sapling/hgext3rd/lfs/wrapper.py
Jun Wu 0d6151f530 remotefilelog: add lfs integration test
Summary:
The test covers common workflows like clone, commit, push, update, pull. It
exercises the remotefilelog plain store and Python datapack store to make sure
they won't lose the revlog flag. The test also tries to verify rename works
correctly.

Since the lfs extension may be eventually upstreamed, it seems a good idea to
make remotefilelog call `lfs.wrapfilelog` so lfs is free from remotefilelog
code.

Test Plan: Added a test

Reviewers: #mercurial, durham

Reviewed By: durham

Subscribers: mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D4904281

Signature: t1:4904281:1492560308:5fd9f214ada6de795735ea7d737d30c1bf39812a
2017-04-26 19:55:02 -07:00

217 lines
6.9 KiB
Python

# coding=UTF-8
from __future__ import absolute_import
from mercurial import (
filelog,
revlog,
util as hgutil,
)
from mercurial.i18n import _
from mercurial.node import nullid
from . import (
blobstore,
pointer,
util,
)
def supportedoutgoingversions(orig, repo):
versions = orig(repo)
versions.discard('01')
versions.discard('02')
versions.add('03')
return versions
def allsupportedversions(orig, ui):
versions = orig(ui)
versions.add('03')
return versions
def bypasscheckhash(self, text):
return False
def readfromstore(self, text):
"""Read filelog content from local blobstore transform for flagprocessor.
Default tranform for flagprocessor, returning contents from blobstore.
Returns a 2-typle (text, validatehash) where validatehash is True as the
contents of the blobstore should be checked using checkhash.
"""
metadata = pointer.deserialize(text)
verifyhash = False
# if bypass is set, do not read remote blobstore, skip hash check, but
# still write hg filelog metadata
if not self.opener.options['lfsbypass']:
verifyhash = True
storeids = metadata.tostoreids()
store = self.opener.lfslocalblobstore
if not isinstance(storeids, list):
storeids = [storeids]
missing = filter(lambda id: not store.has(id), storeids)
if missing:
self.opener.lfsremoteblobstore.readbatch(missing, store)
text = ''.join([store.read(id) for id in storeids])
# pack hg filelog metadata
hgmeta = {}
for k in metadata.keys():
if k.startswith('x-hg-'):
name = k[len('x-hg-'):]
hgmeta[name] = metadata[k]
if hgmeta or text.startswith('\1\n'):
text = filelog.packmeta(hgmeta, text)
return (text, verifyhash)
def writetostore(self, text):
if self.opener.options['lfsbypass']:
return (text, False)
# hg filelog metadata (includes rename, etc)
hgmeta, offset = filelog.parsemeta(text)
if offset and offset > 0:
# lfs blob does not contain hg filelog metadata
text = text[offset:]
offset = 0
chunkoids = []
chunksize = self.opener.options['lfschunksize']
if not chunksize:
chunksize = len(text)
while offset < len(text):
chunk = text[offset:offset + chunksize] # Python handles overflows
chunklen = len(chunk)
# compute sha256 for git-lfs
sha = util.sha256(chunk)
# Store actual contents to local blobstore
storeid = blobstore.StoreID(sha, chunklen)
self.opener.lfslocalblobstore.write(storeid, chunk)
chunkoids.append(storeid)
offset += chunklen
# replace contents with metadata
hashalgo = 'sha256'
if len(chunkoids) == 1:
storeid = chunkoids[0]
metadata = pointer.GithubPointer(storeid.oid, hashalgo, storeid.size)
else:
metadata = pointer.ChunkingPointer(
chunks=[{'oid': v.oid, 'size': v.size} for v in chunkoids],
hashalgo=hashalgo,
size=len(text))
# translate hg filelog metadata to lfs metadata with "x-hg-" prefix
if hgmeta is not None:
for k, v in hgmeta.iteritems():
metadata['x-hg-%s' % k] = v
text = str(metadata)
return (text, False)
def _islfs(rlog, node=None, rev=None):
if rev is None:
rev = rlog.rev(node)
else:
node = rlog.node(rev)
if node == nullid:
return False
flags = rlog.flags(rev)
return bool(flags & revlog.REVIDX_EXTSTORED)
def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
cachedelta=None, node=None,
flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
if not self.opener.options['lfsbypass']:
threshold = self.opener.options['lfsthreshold']
textlen = len(text)
# exclude hg rename meta from file size
meta, offset = filelog.parsemeta(text)
if offset:
textlen -= offset
if threshold and textlen > threshold:
flags |= revlog.REVIDX_EXTSTORED
return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
node=node, flags=flags, **kwds)
def filelogsize(orig, self, rev):
if _islfs(self, rev=rev):
# fast path: use lfs metadata to answer size
rawtext = self.revision(rev, raw=True)
metadata = pointer.deserialize(rawtext)
return int(metadata['size'])
return orig(self, rev)
def vfsinit(orig, self, othervfs):
orig(self, othervfs)
# copy lfs related options
for k, v in othervfs.options.items():
if k.startswith('lfs'):
self.options[k] = v
# also copy lfs blobstores. note: this can run before reposetup, so lfs
# blobstore attributes are not always ready at this time.
for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
if hgutil.safehasattr(othervfs, name):
setattr(self, name, getattr(othervfs, name))
def prepush(pushop):
"""Prepush hook.
Read through the revisions to push, looking for filelog entries that can be
deserialized into metadata so that we can block the push on their upload to
the remote blobstore.
"""
repo = pushop.repo
ui = pushop.ui
remoterepo = pushop.remote.local()
# We beed to pass on the information to the remote about the threshold so
# that _peek_islargefile can mark the file as large file.
threshold = repo.svfs.options.get('lfsthreshold')
if threshold is not None:
remoterepo.svfs.options['lfsthreshold'] = threshold
if ui.verbose:
ui.write(_('lfs: computing set of blobs to upload\n'))
toupload = []
totalsize = 0
for i, n in enumerate(pushop.outgoing.missing):
ctx = repo[n]
files = set(ctx.files())
for f in files:
if f not in ctx:
continue
filectx = ctx[f]
flags = filectx.filelog().flags(filectx.filerev())
if flags & revlog.REVIDX_EXTSTORED != revlog.REVIDX_EXTSTORED:
continue
try:
metadata = pointer.deserialize(ctx[f].rawdata())
totalsize += long(metadata['size'])
storeids = metadata.tostoreids()
if isinstance(storeids, list):
toupload.extend(storeids)
else:
toupload.append(storeids)
except pointer.PointerDeserializationError:
msg = _('lfs: could not deserialize pointer for file %s, '
'revision %s\n')
ui.write(msg % (f, filectx.filerev()))
raise
if not toupload:
return
if ui.verbose:
msg = _('lfs: need to upload %s objects (%s)\n')
ui.write(msg % (len(toupload), hgutil.bytecount(totalsize)))
remoteblob = repo.svfs.lfsremoteblobstore
remoteblob.writebatch(toupload, repo.svfs.lfslocalblobstore,
total=totalsize)