2017-03-25 05:00:30 +03:00
|
|
|
# coding=UTF-8
|
|
|
|
|
|
|
|
from __future__ import absolute_import
|
|
|
|
|
|
|
|
from mercurial import (
|
|
|
|
revlog,
|
2017-03-25 05:01:42 +03:00
|
|
|
util as hgutil,
|
2017-03-25 05:00:30 +03:00
|
|
|
)
|
2017-03-25 05:01:42 +03:00
|
|
|
from mercurial.i18n import _
|
lfs: do not store mercurial filelog metadata in lfs blobs
Summary:
Per discussion with @davidsp, it's better for LFS to not store Mercurial
filelog metadata, which is currently used to store rename information. That has
many advantages:
- Large blobs could be reused across renames
- No need to special handle files starting with `\1\n`
- P4 LFS server implementation is much easier
- remotefilelog LFS support is easier and cleaner
That said, the rename information is stored as lfs metadata using the
non-standard `x-hg-copy`, `x-hg-copyrev` keys. So they still exist and are
functional.
The disadvantage is that rename gets no longer hashed, which is probably fine.
Test Plan: Added a test
Reviewers: davidsp, #sourcecontrol, rmcelroy
Reviewed By: rmcelroy
Subscribers: jsgf, rmcelroy, stash, mjpieters, davidsp
Differential Revision: https://phabricator.intern.facebook.com/D4849764
Signature: t1:4849764:1491580506:1d80ad476b9cbd6773843cb52aee6745f478a0b0
2017-04-08 04:29:35 +03:00
|
|
|
from mercurial.node import bin, nullid
|
2017-03-25 05:00:30 +03:00
|
|
|
|
|
|
|
from . import (
|
|
|
|
blobstore,
|
|
|
|
pointer,
|
|
|
|
util,
|
|
|
|
)
|
|
|
|
|
|
|
|
def supportedoutgoingversions(orig, repo):
|
|
|
|
versions = orig(repo)
|
|
|
|
versions.discard('01')
|
|
|
|
versions.discard('02')
|
|
|
|
versions.add('03')
|
|
|
|
return versions
|
|
|
|
|
|
|
|
def allsupportedversions(orig, ui):
|
|
|
|
versions = orig(ui)
|
|
|
|
versions.add('03')
|
|
|
|
return versions
|
|
|
|
|
|
|
|
def bypasscheckhash(self, text):
|
|
|
|
return False
|
|
|
|
|
|
|
|
def readfromstore(self, text):
|
|
|
|
"""Read filelog content from local blobstore transform for flagprocessor.
|
|
|
|
|
|
|
|
Default tranform for flagprocessor, returning contents from blobstore.
|
|
|
|
Returns a 2-typle (text, validatehash) where validatehash is True as the
|
|
|
|
contents of the blobstore should be checked using checkhash.
|
|
|
|
"""
|
2017-04-05 02:08:36 +03:00
|
|
|
if self.opener.options['lfsbypass']:
|
|
|
|
return (text, False)
|
|
|
|
|
2017-03-31 02:41:46 +03:00
|
|
|
metadata = pointer.deserialize(text)
|
|
|
|
storeids = metadata.tostoreids()
|
2017-04-06 01:48:10 +03:00
|
|
|
store = self.opener.lfslocalblobstore
|
2017-03-31 02:41:46 +03:00
|
|
|
if not isinstance(storeids, list):
|
|
|
|
storeids = [storeids]
|
|
|
|
missing = filter(lambda id: not store.has(id), storeids)
|
|
|
|
if missing:
|
2017-04-06 01:48:10 +03:00
|
|
|
self.opener.lfsremoteblobstore.readbatch(missing, store)
|
2017-03-31 02:41:46 +03:00
|
|
|
text = ''.join([store.read(id) for id in storeids])
|
|
|
|
return (text, True)
|
2017-03-25 05:00:30 +03:00
|
|
|
|
|
|
|
def writetostore(self, text):
|
2017-04-05 02:08:36 +03:00
|
|
|
if self.opener.options['lfsbypass']:
|
|
|
|
return (text, False)
|
|
|
|
|
2017-03-25 05:00:30 +03:00
|
|
|
offset = 0
|
|
|
|
chunkoids = []
|
2017-04-05 02:10:29 +03:00
|
|
|
chunksize = self.opener.options['lfschunksize']
|
2017-03-25 05:00:30 +03:00
|
|
|
|
|
|
|
if not chunksize:
|
|
|
|
chunksize = len(text)
|
|
|
|
|
|
|
|
while offset < len(text):
|
2017-03-25 05:01:42 +03:00
|
|
|
chunk = text[offset:offset + chunksize] # Python handles overflows
|
2017-03-25 05:00:30 +03:00
|
|
|
chunklen = len(chunk)
|
|
|
|
# compute sha256 for git-lfs
|
|
|
|
sha = util.sha256(chunk)
|
|
|
|
# Store actual contents to local blobstore
|
|
|
|
storeid = blobstore.StoreID(sha, chunklen)
|
2017-04-06 01:48:10 +03:00
|
|
|
self.opener.lfslocalblobstore.write(storeid, chunk)
|
2017-03-25 05:00:30 +03:00
|
|
|
chunkoids.append(storeid)
|
|
|
|
offset += chunklen
|
|
|
|
|
|
|
|
# replace contents with metadata
|
2017-04-10 21:17:48 +03:00
|
|
|
hashalgo = 'sha256'
|
|
|
|
if len(chunkoids) == 1:
|
|
|
|
storeid = chunkoids[0]
|
|
|
|
metadata = pointer.GithubPointer(storeid.oid, hashalgo, storeid.size)
|
|
|
|
else:
|
|
|
|
metadata = pointer.ChunkingPointer(
|
|
|
|
chunks=[{'oid': v.oid, 'size': v.size} for v in chunkoids],
|
|
|
|
hashalgo=hashalgo,
|
|
|
|
size=len(text))
|
lfs: do not store mercurial filelog metadata in lfs blobs
Summary:
Per discussion with @davidsp, it's better for LFS to not store Mercurial
filelog metadata, which is currently used to store rename information. That has
many advantages:
- Large blobs could be reused across renames
- No need to special handle files starting with `\1\n`
- P4 LFS server implementation is much easier
- remotefilelog LFS support is easier and cleaner
That said, the rename information is stored as lfs metadata using the
non-standard `x-hg-copy`, `x-hg-copyrev` keys. So they still exist and are
functional.
The disadvantage is that rename gets no longer hashed, which is probably fine.
Test Plan: Added a test
Reviewers: davidsp, #sourcecontrol, rmcelroy
Reviewed By: rmcelroy
Subscribers: jsgf, rmcelroy, stash, mjpieters, davidsp
Differential Revision: https://phabricator.intern.facebook.com/D4849764
Signature: t1:4849764:1491580506:1d80ad476b9cbd6773843cb52aee6745f478a0b0
2017-04-08 04:29:35 +03:00
|
|
|
|
|
|
|
# hg filelog metadata (includes rename, etc)
|
|
|
|
hgmeta = getattr(self, '_filelogmeta', None)
|
|
|
|
if hgmeta:
|
|
|
|
# only care about a whitelist of hg filelog metadata
|
|
|
|
for name in ['copy', 'copyrev']:
|
|
|
|
if name in hgmeta:
|
|
|
|
metadata['x-hg-%s' % name] = hgmeta[name]
|
2017-03-25 05:00:30 +03:00
|
|
|
text = str(metadata)
|
|
|
|
|
|
|
|
return (text, False)
|
|
|
|
|
2017-04-11 23:20:58 +03:00
|
|
|
def _islfs(rlog, node=None, rev=None):
|
|
|
|
if rev is None:
|
|
|
|
rev = rlog.rev(node)
|
|
|
|
else:
|
|
|
|
node = rlog.node(rev)
|
lfs: do not store mercurial filelog metadata in lfs blobs
Summary:
Per discussion with @davidsp, it's better for LFS to not store Mercurial
filelog metadata, which is currently used to store rename information. That has
many advantages:
- Large blobs could be reused across renames
- No need to special handle files starting with `\1\n`
- P4 LFS server implementation is much easier
- remotefilelog LFS support is easier and cleaner
That said, the rename information is stored as lfs metadata using the
non-standard `x-hg-copy`, `x-hg-copyrev` keys. So they still exist and are
functional.
The disadvantage is that rename gets no longer hashed, which is probably fine.
Test Plan: Added a test
Reviewers: davidsp, #sourcecontrol, rmcelroy
Reviewed By: rmcelroy
Subscribers: jsgf, rmcelroy, stash, mjpieters, davidsp
Differential Revision: https://phabricator.intern.facebook.com/D4849764
Signature: t1:4849764:1491580506:1d80ad476b9cbd6773843cb52aee6745f478a0b0
2017-04-08 04:29:35 +03:00
|
|
|
if node == nullid:
|
|
|
|
return False
|
|
|
|
flags = revlog.revlog.flags(rlog, rev)
|
|
|
|
return bool(flags & revlog.REVIDX_EXTSTORED)
|
|
|
|
|
|
|
|
def filelogadd(orig, self, text, meta, transaction, link, p1=None, p2=None):
|
|
|
|
# drop meta (usually used for renaming tracking), to simplify blob handling
|
2017-04-05 02:08:36 +03:00
|
|
|
if not self.opener.options['lfsbypass']:
|
2017-04-05 02:22:31 +03:00
|
|
|
threshold = self.opener.options['lfsthreshold']
|
2017-03-25 05:00:30 +03:00
|
|
|
|
2017-04-05 02:08:36 +03:00
|
|
|
if threshold and len(text) > threshold:
|
lfs: do not store mercurial filelog metadata in lfs blobs
Summary:
Per discussion with @davidsp, it's better for LFS to not store Mercurial
filelog metadata, which is currently used to store rename information. That has
many advantages:
- Large blobs could be reused across renames
- No need to special handle files starting with `\1\n`
- P4 LFS server implementation is much easier
- remotefilelog LFS support is easier and cleaner
That said, the rename information is stored as lfs metadata using the
non-standard `x-hg-copy`, `x-hg-copyrev` keys. So they still exist and are
functional.
The disadvantage is that rename gets no longer hashed, which is probably fine.
Test Plan: Added a test
Reviewers: davidsp, #sourcecontrol, rmcelroy
Reviewed By: rmcelroy
Subscribers: jsgf, rmcelroy, stash, mjpieters, davidsp
Differential Revision: https://phabricator.intern.facebook.com/D4849764
Signature: t1:4849764:1491580506:1d80ad476b9cbd6773843cb52aee6745f478a0b0
2017-04-08 04:29:35 +03:00
|
|
|
flags = revlog.REVIDX_EXTSTORED | revlog.REVIDX_DEFAULT_FLAGS
|
|
|
|
self._filelogmeta = meta # for flagprocessor to pick up
|
|
|
|
try:
|
|
|
|
return self.addrevision(text, transaction, link, p1, p2,
|
|
|
|
flags=flags)
|
|
|
|
finally:
|
|
|
|
self._filelogmeta = None
|
|
|
|
|
|
|
|
return orig(self, text, meta, transaction, link, p1, p2)
|
|
|
|
|
|
|
|
def filelogread(orig, self, node):
|
|
|
|
if _islfs(self, node):
|
|
|
|
# no metadata stored, no need to test metadata header ("\1\n")
|
|
|
|
return self.revision(node)
|
|
|
|
return orig(self, node)
|
|
|
|
|
|
|
|
def filelogcmp(orig, self, node, text):
|
|
|
|
if text.startswith('\1\n') and _islfs(self, node):
|
|
|
|
# do not prepend '\1\n' in lfs's case, test directly
|
|
|
|
return self.revision(node) != text
|
|
|
|
return orig(self, node, text)
|
|
|
|
|
|
|
|
def filelogrenamed(orig, self, node):
|
|
|
|
if _islfs(self, node):
|
|
|
|
rawtext = self.revision(node, raw=True)
|
|
|
|
if not rawtext:
|
|
|
|
return False
|
|
|
|
metadata = pointer.deserialize(rawtext)
|
|
|
|
if 'x-hg-copy' in metadata and 'x-hg-copyrev' in metadata:
|
|
|
|
return metadata['x-hg-copy'], bin(metadata['x-hg-copyrev'])
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
return orig(self, node)
|
2017-03-25 05:00:30 +03:00
|
|
|
|
2017-04-11 23:20:58 +03:00
|
|
|
def filelogsize(orig, self, rev):
|
|
|
|
if _islfs(self, rev=rev):
|
|
|
|
# fast path: use lfs metadata to answer size
|
|
|
|
rawtext = self.revision(rev, raw=True)
|
|
|
|
metadata = pointer.deserialize(rawtext)
|
|
|
|
return int(metadata['size'])
|
|
|
|
return orig(self, rev)
|
|
|
|
|
2017-04-10 21:11:37 +03:00
|
|
|
def vfsinit(orig, self, othervfs):
|
|
|
|
orig(self, othervfs)
|
|
|
|
# copy lfs related options
|
|
|
|
for k, v in othervfs.options.items():
|
|
|
|
if k.startswith('lfs'):
|
|
|
|
self.options[k] = v
|
|
|
|
# also copy lfs blobstores. note: this can run before reposetup, so lfs
|
|
|
|
# blobstore attributes are not always ready at this time.
|
|
|
|
for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
|
|
|
|
if hgutil.safehasattr(othervfs, name):
|
|
|
|
setattr(self, name, getattr(othervfs, name))
|
|
|
|
|
2017-03-25 05:00:30 +03:00
|
|
|
def prepush(pushop):
|
|
|
|
"""Prepush hook.
|
|
|
|
|
|
|
|
Read through the revisions to push, looking for filelog entries that can be
|
|
|
|
deserialized into metadata so that we can block the push on their upload to
|
|
|
|
the remote blobstore.
|
|
|
|
"""
|
|
|
|
repo = pushop.repo
|
|
|
|
ui = pushop.ui
|
|
|
|
remoterepo = pushop.remote.local()
|
|
|
|
|
|
|
|
# We beed to pass on the information to the remote about the threshold so
|
|
|
|
# that _peek_islargefile can mark the file as large file.
|
|
|
|
threshold = repo.svfs.options.get('lfsthreshold')
|
|
|
|
if threshold is not None:
|
|
|
|
remoterepo.svfs.options['lfsthreshold'] = threshold
|
|
|
|
|
2017-04-06 01:58:56 +03:00
|
|
|
if ui.verbose:
|
|
|
|
ui.write(_('lfs: computing set of blobs to upload\n'))
|
2017-03-25 05:00:30 +03:00
|
|
|
toupload = []
|
|
|
|
totalsize = 0
|
|
|
|
for i, n in enumerate(pushop.outgoing.missing):
|
|
|
|
ctx = repo[n]
|
|
|
|
files = set(ctx.files())
|
|
|
|
for f in files:
|
2017-03-31 02:41:46 +03:00
|
|
|
if f not in ctx:
|
|
|
|
continue
|
2017-03-25 05:00:30 +03:00
|
|
|
filectx = ctx[f]
|
|
|
|
flags = filectx.filelog().flags(filectx.filerev())
|
|
|
|
if flags & revlog.REVIDX_EXTSTORED != revlog.REVIDX_EXTSTORED:
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
metadata = pointer.deserialize(ctx[f].rawdata())
|
|
|
|
totalsize += long(metadata['size'])
|
|
|
|
storeids = metadata.tostoreids()
|
|
|
|
if isinstance(storeids, list):
|
|
|
|
toupload.extend(storeids)
|
|
|
|
else:
|
|
|
|
toupload.append(storeids)
|
|
|
|
except pointer.PointerDeserializationError:
|
2017-03-25 05:01:42 +03:00
|
|
|
msg = _('lfs: could not deserialize pointer for file %s, '
|
|
|
|
'revision %s\n')
|
|
|
|
ui.write(msg % (f, filectx.filerev()))
|
2017-03-25 05:00:30 +03:00
|
|
|
raise
|
|
|
|
|
2017-04-06 01:58:56 +03:00
|
|
|
if not toupload:
|
|
|
|
return
|
|
|
|
|
|
|
|
if ui.verbose:
|
2017-04-10 21:17:48 +03:00
|
|
|
msg = _('lfs: need to upload %s objects (%s)\n')
|
2017-04-06 01:58:56 +03:00
|
|
|
ui.write(msg % (len(toupload), hgutil.bytecount(totalsize)))
|
|
|
|
|
2017-04-06 01:48:10 +03:00
|
|
|
remoteblob = repo.svfs.lfsremoteblobstore
|
|
|
|
remoteblob.writebatch(toupload, repo.svfs.lfslocalblobstore,
|
2017-03-25 05:01:42 +03:00
|
|
|
total=totalsize)
|