mirror of
https://github.com/facebook/sapling.git
synced 2025-01-06 04:43:19 +03:00
snapshot: bundle metadata and the related files
Summary: Instead of the lfs remote storage it was chosen to send the snapshot metadata via bundles. Snapshot metadata consists of: the actual metadata blob + several other blobs (untracked files etc). If we have several snapshot revisions in a single bundle, the blobs could repeat. Then we should store each blob as a separate entry in a binary stream, keeping its id and contents. Here we introduce a new bundle part type `"b2x:snapshotmetadataparttype"`. ``` 1 byte of version info [ # a list of binary entries, each corresponds to a separate file # (either a metadata file itself or a related -- externally stored -- file) <oid><length><data> :oid: is a 64char string with the hash of the file :length: is an unsigned int with length of the data :data: is binary data of length <length>, the actual file contents ] ``` So far there is an ongoing discussion on the exact format of serialization. Actual state is at [the quip doc](https://fb.quip.com/R5OVAzabX8oo). Reviewed By: markbt Differential Revision: D17184222 fbshipit-source-id: 90f833ec71556e90d513e3be3f3efa7f870b037d
This commit is contained in:
parent
bcd08fcda3
commit
22dce8230d
@ -144,6 +144,19 @@ def getscratchbranchparts(
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
snapshot = extensions.find("snapshot")
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
data = snapshot.bundleparts.getmetadatafromrevs(repo, outgoing.missing)
|
||||
if data:
|
||||
parts.append(
|
||||
bundle2.bundlepart(
|
||||
snapshot.bundleparts.snapshotmetadataparttype, data=data
|
||||
)
|
||||
)
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
|
@ -465,6 +465,12 @@ def processparts(orig, repo, op, unbundler):
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
snapshot = extensions.find("snapshot")
|
||||
partforwardingwhitelist.append(snapshot.bundleparts.snapshotmetadataparttype)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
bundler = bundle2.bundle20(repo.ui)
|
||||
compress = repo.ui.config("infinitepush", "bundlecompression", "UN")
|
||||
bundler.setcompression(compress)
|
||||
|
@ -20,12 +20,16 @@ Configs::
|
||||
[ui]
|
||||
# Allow to run `hg checkout` for snapshot revisions
|
||||
allow-checkout-snapshot = False
|
||||
|
||||
[snapshot]
|
||||
# Sync snapshot metadata via bundle2
|
||||
enable-sync-bundle = False
|
||||
"""
|
||||
|
||||
from edenscm.mercurial import error, extensions, hg, registrar
|
||||
from edenscm.mercurial.i18n import _
|
||||
|
||||
from . import blobstore, cmds as snapshotcommands, metadata
|
||||
from . import blobstore, bundleparts, cmds as snapshotcommands, metadata
|
||||
|
||||
|
||||
cmdtable = snapshotcommands.cmdtable
|
||||
@ -33,6 +37,11 @@ cmdtable = snapshotcommands.cmdtable
|
||||
configtable = {}
|
||||
configitem = registrar.configitem(configtable)
|
||||
configitem("ui", "allow-checkout-snapshot", default=False)
|
||||
configitem("snapshot", "enable-sync-bundle", default=False)
|
||||
|
||||
|
||||
def uisetup(ui):
|
||||
bundleparts.uisetup(ui)
|
||||
|
||||
|
||||
def reposetup(ui, repo):
|
||||
|
114
edenscm/hgext/snapshot/bundleparts.py
Normal file
114
edenscm/hgext/snapshot/bundleparts.py
Normal file
@ -0,0 +1,114 @@
|
||||
# Copyright 2019 Facebook, Inc.
|
||||
#
|
||||
# This software may be used and distributed according to the terms of the
|
||||
# GNU General Public License version 2 or any later version.
|
||||
|
||||
import struct
|
||||
|
||||
from edenscm.mercurial import bundle2, error
|
||||
from edenscm.mercurial.i18n import _
|
||||
|
||||
from . import metadata
|
||||
|
||||
|
||||
snapshotmetadataparttype = "b2x:snapshotmetadata"
|
||||
|
||||
|
||||
def uisetup(ui):
|
||||
if ui.configbool("snapshot", "enable-sync-bundle"):
|
||||
bundle2.capabilities[snapshotmetadataparttype] = ()
|
||||
|
||||
|
||||
def getmetadatafromrevs(repo, revs):
|
||||
"""get binary representation of snapshot metadata by a list of revs
|
||||
"""
|
||||
metadataids = set()
|
||||
unfi = repo.unfiltered()
|
||||
for rev in revs:
|
||||
# TODO(alexeyqu): move this check into a function
|
||||
if rev not in unfi:
|
||||
raise error.Abort(_("%s not found in repo\n") % rev)
|
||||
ctx = unfi[rev]
|
||||
snapshotmetadataid = ctx.extra().get("snapshotmetadataid", None)
|
||||
if snapshotmetadataid:
|
||||
metadataids.add(snapshotmetadataid)
|
||||
if not metadataids:
|
||||
return None
|
||||
return binaryencode(repo, metadataids)
|
||||
|
||||
|
||||
@bundle2.parthandler(snapshotmetadataparttype)
|
||||
def handlemetadata(op, inpart):
|
||||
"""unpack metadata for snapshots
|
||||
"""
|
||||
binarydecode(op.repo, inpart)
|
||||
|
||||
|
||||
_versionentry = struct.Struct(">B")
|
||||
_binaryentry = struct.Struct(">64sI")
|
||||
|
||||
|
||||
def binaryencode(repo, metadataids):
|
||||
"""encode snapshot metadata into a binary stream
|
||||
|
||||
the binary format is:
|
||||
<version-byte>[<chunk-id><chunk-length><chunk-content>]+
|
||||
|
||||
:version-byte: is a version byte
|
||||
:chunk-id: is a string of 64 chars -- sha256 of the chunk
|
||||
:chunk-length: is an unsigned int
|
||||
:chunk-content: is the metadata contents (of length <chunk-length>)
|
||||
"""
|
||||
|
||||
def _encode(oid, data):
|
||||
return [_binaryentry.pack(oid, len(data)), data]
|
||||
|
||||
metadataauxfileids = set()
|
||||
binaryparts = []
|
||||
# store the version info
|
||||
binaryparts.append(_versionentry.pack(metadata.snapshotmetadata.VERSION))
|
||||
# store the metadata files
|
||||
for metadataid in metadataids:
|
||||
snapmetadata = metadata.snapshotmetadata.getfromlocalstorage(repo, metadataid)
|
||||
metadataauxfileids.update(snapmetadata.getauxfileids())
|
||||
data = snapmetadata.serialize()
|
||||
binaryparts += _encode(metadataid, data)
|
||||
# store files that are mentioned in metadata
|
||||
for auxfileid in metadataauxfileids:
|
||||
data = repo.svfs.snapshotstore.read(auxfileid)
|
||||
binaryparts += _encode(auxfileid, data)
|
||||
return "".join(binaryparts)
|
||||
|
||||
|
||||
def binarydecode(repo, stream):
|
||||
"""decode a binary stream into individual blobs and store them
|
||||
Returns a list of file ids.
|
||||
|
||||
the binary format is:
|
||||
<version-byte>[<chunk-id><chunk-length><chunk-content>]+
|
||||
|
||||
:version-byte: is a version byte
|
||||
:chunk-id: is a string of 64 chars -- sha256 of the chunk
|
||||
:chunk-length: is an unsigned int
|
||||
:chunk-content: is the metadata contents (of length <chunk-length>)
|
||||
"""
|
||||
# check the version info
|
||||
version = _versionentry.unpack(stream.read(_versionentry.size))[0]
|
||||
if version != metadata.snapshotmetadata.VERSION:
|
||||
raise error.Abort(_("invalid version number %d") % version)
|
||||
entrysize = _binaryentry.size
|
||||
fileids = []
|
||||
while True:
|
||||
entry = stream.read(entrysize)
|
||||
if len(entry) < entrysize:
|
||||
if entry:
|
||||
raise error.Abort(_("bad snapshot metadata stream"))
|
||||
break
|
||||
oid, length = _binaryentry.unpack(entry)
|
||||
data = stream.read(length)
|
||||
if len(data) < length:
|
||||
if data:
|
||||
raise error.Abort(_("bad snapshot metadata stream"))
|
||||
repo.svfs.snapshotstore.write(oid, data)
|
||||
fileids.append(oid)
|
||||
return fileids
|
@ -115,6 +115,12 @@ class snapshotmetadata(object):
|
||||
except ValueError:
|
||||
raise error.Abort(_("invalid metadata json: %s\n") % json_string)
|
||||
|
||||
def getauxfileids(self):
|
||||
auxfileids = set()
|
||||
auxfileids.update(f.oid for f in self.unknown)
|
||||
auxfileids.update(f.oid for f in self.localvfsfiles)
|
||||
return auxfileids
|
||||
|
||||
@classmethod
|
||||
def createfromworkingcopy(cls, repo, status=None, include_untracked=True):
|
||||
metadata = cls(repo)
|
||||
|
149
tests/test-fb-hgext-snapshot-backup.t
Normal file
149
tests/test-fb-hgext-snapshot-backup.t
Normal file
@ -0,0 +1,149 @@
|
||||
# Initial setup
|
||||
$ setconfig extensions.lfs=
|
||||
$ setconfig extensions.rebase=
|
||||
$ setconfig extensions.snapshot=
|
||||
$ setconfig extensions.treemanifest=!
|
||||
$ setconfig visibility.enabled=true
|
||||
$ . "$TESTDIR/library.sh"
|
||||
$ . "$TESTDIR/infinitepush/library.sh"
|
||||
$ setupcommon
|
||||
$ setconfig infinitepushbackup.logdir="$TESTTMP/logs" infinitepushbackup.hostname=testhost
|
||||
$ setconfig snapshot.enable-sync-bundle=true
|
||||
|
||||
# Setup server
|
||||
$ hg init server
|
||||
$ cd server
|
||||
$ setupserver
|
||||
$ cd ..
|
||||
|
||||
# Setup clients
|
||||
$ hg clone -q ssh://user@dummy/server client
|
||||
$ hg clone -q ssh://user@dummy/server restored
|
||||
$ cd client
|
||||
$ hg debugvisibility start
|
||||
|
||||
# Add a file to the store
|
||||
$ echo "foo" > foofile
|
||||
$ mkdir bar
|
||||
$ echo "bar" > bar/file
|
||||
$ hg add foofile bar/file
|
||||
$ hg commit -m "add some files"
|
||||
$ hg push
|
||||
pushing to ssh://user@dummy/server
|
||||
searching for changes
|
||||
remote: adding changesets
|
||||
remote: adding manifests
|
||||
remote: adding file changes
|
||||
remote: added 1 changesets with 2 changes to 2 files
|
||||
|
||||
# Call this state a base revision
|
||||
$ BASEREV="$(hg id -i)"
|
||||
$ echo "$BASEREV"
|
||||
3490593cf53c
|
||||
|
||||
|
||||
# Snapshot backup test plan:
|
||||
# 1) Create a snapshot, back it up + restore on another client
|
||||
|
||||
|
||||
# 1) Create a snapshot, back it up + restore on another client
|
||||
# Setup the environment
|
||||
$ echo "a" > mergefile
|
||||
$ hg add mergefile
|
||||
$ hg commit -m "merge #1"
|
||||
$ MERGEREV="$(hg id -i)"
|
||||
$ hg checkout "$BASEREV"
|
||||
0 files updated, 0 files merged, 1 files removed, 0 files unresolved
|
||||
$ echo "b" > mergefile
|
||||
$ hg add mergefile
|
||||
$ hg commit -m "merge #2"
|
||||
$ hg merge "$MERGEREV"
|
||||
merging mergefile
|
||||
warning: 1 conflicts while merging mergefile! (edit, then use 'hg resolve --mark')
|
||||
0 files updated, 0 files merged, 0 files removed, 1 files unresolved
|
||||
use 'hg resolve' to retry unresolved file merges or 'hg update -C .' to abandon
|
||||
[1]
|
||||
$ hg rm bar/file
|
||||
$ rm foofile
|
||||
$ echo "another" > bazfile
|
||||
$ hg add bazfile
|
||||
$ echo "fizz" > untrackedfile
|
||||
$ BEFORESTATUS="$(hg status --verbose)"
|
||||
$ echo "$BEFORESTATUS"
|
||||
M mergefile
|
||||
A bazfile
|
||||
R bar/file
|
||||
! foofile
|
||||
? mergefile.orig
|
||||
? untrackedfile
|
||||
# The repository is in an unfinished *merge* state.
|
||||
|
||||
# Unresolved merge conflicts:
|
||||
#
|
||||
# mergefile
|
||||
#
|
||||
# To mark files as resolved: hg resolve --mark FILE
|
||||
|
||||
# To continue: hg commit
|
||||
# To abort: hg update --clean . (warning: this will discard uncommitted changes)
|
||||
$ BEFOREDIFF="$(hg diff)"
|
||||
$ echo "$BEFOREDIFF"
|
||||
diff -r 6eb2552aed20 bar/file
|
||||
--- a/bar/file Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -1,1 +0,0 @@
|
||||
-bar
|
||||
diff -r 6eb2552aed20 bazfile
|
||||
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/bazfile Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -0,0 +1,1 @@
|
||||
+another
|
||||
diff -r 6eb2552aed20 mergefile
|
||||
--- a/mergefile Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/mergefile Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -1,1 +1,5 @@
|
||||
+<<<<<<< working copy: 6eb2552aed20 - test: merge #2
|
||||
b
|
||||
+=======
|
||||
+a
|
||||
+>>>>>>> merge rev: f473d4d5a1c0 - test: merge #1
|
||||
|
||||
# Make a snapshot
|
||||
$ OID="$(hg snapshot create | cut -f2 -d' ')"
|
||||
$ echo "$OID"
|
||||
aaa7692160b6c5c0e4c13787d9343cf89fc2311a
|
||||
|
||||
# Back it up
|
||||
$ hg cloud backup --hidden -r "$OID"
|
||||
backing up stack rooted at f473d4d5a1c0
|
||||
remote: pushing 3 commits:
|
||||
remote: f473d4d5a1c0 merge #1
|
||||
remote: 6eb2552aed20 merge #2
|
||||
remote: aaa7692160b6 snapshot
|
||||
backing up stack rooted at 6eb2552aed20
|
||||
remote: pushing 3 commits:
|
||||
remote: f473d4d5a1c0 merge #1
|
||||
remote: 6eb2552aed20 merge #2
|
||||
remote: aaa7692160b6 snapshot
|
||||
commitcloud: backed up 3 commits
|
||||
|
||||
# Restore it on another client
|
||||
$ cd ../restored
|
||||
$ hg pull -r "$OID"
|
||||
pulling from ssh://user@dummy/server
|
||||
adding changesets
|
||||
adding manifests
|
||||
adding file changes
|
||||
added 1 changesets with 2 changes to 2 files
|
||||
adding changesets
|
||||
adding manifests
|
||||
adding file changes
|
||||
added 3 changesets with 4 changes to 2 files
|
||||
new changesets 3490593cf53c:aaa7692160b6
|
||||
$ hg snapshot checkout "$OID"
|
||||
will checkout on aaa7692160b6c5c0e4c13787d9343cf89fc2311a
|
||||
3 files updated, 0 files merged, 0 files removed, 0 files unresolved
|
||||
checkout complete
|
||||
# hg status/diff are unchanged
|
||||
$ test "$BEFORESTATUS" = "$(hg status --verbose)"
|
||||
$ test "$BEFOREDIFF" = "$(hg diff)"
|
Loading…
Reference in New Issue
Block a user