2017-05-17 01:44:06 +03:00
|
|
|
# wrapper.py - methods wrapping core mercurial logic
|
|
|
|
#
|
|
|
|
# Copyright 2017 Facebook, Inc.
|
|
|
|
#
|
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
|
# GNU General Public License version 2 or any later version.
|
2017-03-25 05:00:30 +03:00
|
|
|
|
|
|
|
from __future__ import absolute_import
|
|
|
|
|
2017-05-17 01:43:36 +03:00
|
|
|
import hashlib
|
|
|
|
|
2017-03-25 05:00:30 +03:00
|
|
|
from mercurial import (
|
2017-05-08 21:20:50 +03:00
|
|
|
error,
|
2017-04-22 05:56:27 +03:00
|
|
|
filelog,
|
2017-03-25 05:00:30 +03:00
|
|
|
revlog,
|
2017-05-17 01:43:36 +03:00
|
|
|
util,
|
2017-03-25 05:00:30 +03:00
|
|
|
)
|
2017-03-25 05:01:42 +03:00
|
|
|
from mercurial.i18n import _
|
2017-05-08 21:20:50 +03:00
|
|
|
from mercurial.node import bin, nullid, short
|
2017-03-25 05:00:30 +03:00
|
|
|
|
|
|
|
from . import (
|
2017-05-19 01:10:39 +03:00
|
|
|
blobstore,
|
2017-03-25 05:00:30 +03:00
|
|
|
pointer,
|
|
|
|
)
|
|
|
|
|
|
|
|
def supportedoutgoingversions(orig, repo):
|
|
|
|
versions = orig(repo)
|
|
|
|
versions.discard('01')
|
|
|
|
versions.discard('02')
|
|
|
|
versions.add('03')
|
|
|
|
return versions
|
|
|
|
|
|
|
|
def allsupportedversions(orig, ui):
|
|
|
|
versions = orig(ui)
|
|
|
|
versions.add('03')
|
|
|
|
return versions
|
|
|
|
|
|
|
|
def bypasscheckhash(self, text):
|
|
|
|
return False
|
|
|
|
|
|
|
|
def readfromstore(self, text):
|
|
|
|
"""Read filelog content from local blobstore transform for flagprocessor.
|
|
|
|
|
|
|
|
Default tranform for flagprocessor, returning contents from blobstore.
|
|
|
|
Returns a 2-typle (text, validatehash) where validatehash is True as the
|
|
|
|
contents of the blobstore should be checked using checkhash.
|
|
|
|
"""
|
2017-05-12 03:57:42 +03:00
|
|
|
p = pointer.deserialize(text)
|
|
|
|
oid = p.oid()
|
2017-05-08 21:22:01 +03:00
|
|
|
store = self.opener.lfslocalblobstore
|
2017-05-12 03:57:42 +03:00
|
|
|
if not store.has(oid):
|
|
|
|
self.opener.lfsremoteblobstore.readbatch([p], store)
|
|
|
|
text = store.read(oid)
|
2017-04-22 05:56:27 +03:00
|
|
|
|
|
|
|
# pack hg filelog metadata
|
|
|
|
hgmeta = {}
|
2017-05-12 03:57:42 +03:00
|
|
|
for k in p.keys():
|
2017-04-22 05:56:27 +03:00
|
|
|
if k.startswith('x-hg-'):
|
|
|
|
name = k[len('x-hg-'):]
|
2017-05-12 03:57:42 +03:00
|
|
|
hgmeta[name] = p[k]
|
2017-04-22 05:56:27 +03:00
|
|
|
if hgmeta or text.startswith('\1\n'):
|
|
|
|
text = filelog.packmeta(hgmeta, text)
|
|
|
|
|
2017-05-12 03:48:21 +03:00
|
|
|
return (text, True)
|
2017-03-25 05:00:30 +03:00
|
|
|
|
|
|
|
def writetostore(self, text):
|
2017-04-22 05:56:27 +03:00
|
|
|
# hg filelog metadata (includes rename, etc)
|
|
|
|
hgmeta, offset = filelog.parsemeta(text)
|
|
|
|
if offset and offset > 0:
|
|
|
|
# lfs blob does not contain hg filelog metadata
|
|
|
|
text = text[offset:]
|
|
|
|
|
2017-05-12 03:57:42 +03:00
|
|
|
# git-lfs only supports sha256
|
2017-05-17 01:43:36 +03:00
|
|
|
oid = hashlib.sha256(text).hexdigest()
|
2017-05-12 03:57:42 +03:00
|
|
|
self.opener.lfslocalblobstore.write(oid, text)
|
2017-03-25 05:00:30 +03:00
|
|
|
|
|
|
|
# replace contents with metadata
|
2017-05-12 03:57:42 +03:00
|
|
|
longoid = 'sha256:%s' % oid
|
|
|
|
metadata = pointer.gitlfspointer(oid=longoid, size=str(len(text)))
|
lfs: do not store mercurial filelog metadata in lfs blobs
Summary:
Per discussion with @davidsp, it's better for LFS to not store Mercurial
filelog metadata, which is currently used to store rename information. That has
many advantages:
- Large blobs could be reused across renames
- No need to special handle files starting with `\1\n`
- P4 LFS server implementation is much easier
- remotefilelog LFS support is easier and cleaner
That said, the rename information is stored as lfs metadata using the
non-standard `x-hg-copy`, `x-hg-copyrev` keys. So they still exist and are
functional.
The disadvantage is that rename gets no longer hashed, which is probably fine.
Test Plan: Added a test
Reviewers: davidsp, #sourcecontrol, rmcelroy
Reviewed By: rmcelroy
Subscribers: jsgf, rmcelroy, stash, mjpieters, davidsp
Differential Revision: https://phabricator.intern.facebook.com/D4849764
Signature: t1:4849764:1491580506:1d80ad476b9cbd6773843cb52aee6745f478a0b0
2017-04-08 04:29:35 +03:00
|
|
|
|
2017-05-04 19:18:04 +03:00
|
|
|
# by default, we expect the content to be binary. however, LFS could also
|
|
|
|
# be used for non-binary content. add a special entry for non-binary data.
|
|
|
|
# this will be used by filectx.isbinary().
|
2017-05-17 01:43:36 +03:00
|
|
|
if not util.binary(text):
|
2017-05-04 19:18:04 +03:00
|
|
|
# not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
|
|
|
|
metadata['x-is-binary'] = '0'
|
|
|
|
|
2017-04-22 05:56:27 +03:00
|
|
|
# translate hg filelog metadata to lfs metadata with "x-hg-" prefix
|
|
|
|
if hgmeta is not None:
|
|
|
|
for k, v in hgmeta.iteritems():
|
|
|
|
metadata['x-hg-%s' % k] = v
|
2017-03-25 05:00:30 +03:00
|
|
|
|
2017-05-12 03:48:21 +03:00
|
|
|
rawtext = metadata.serialize()
|
|
|
|
return (rawtext, False)
|
2017-03-25 05:00:30 +03:00
|
|
|
|
2017-04-11 23:20:58 +03:00
|
|
|
def _islfs(rlog, node=None, rev=None):
|
|
|
|
if rev is None:
|
2017-05-10 23:28:39 +03:00
|
|
|
if node is None:
|
|
|
|
# both None - likely working copy content where node is not ready
|
|
|
|
return False
|
2017-04-11 23:20:58 +03:00
|
|
|
rev = rlog.rev(node)
|
|
|
|
else:
|
|
|
|
node = rlog.node(rev)
|
lfs: do not store mercurial filelog metadata in lfs blobs
Summary:
Per discussion with @davidsp, it's better for LFS to not store Mercurial
filelog metadata, which is currently used to store rename information. That has
many advantages:
- Large blobs could be reused across renames
- No need to special handle files starting with `\1\n`
- P4 LFS server implementation is much easier
- remotefilelog LFS support is easier and cleaner
That said, the rename information is stored as lfs metadata using the
non-standard `x-hg-copy`, `x-hg-copyrev` keys. So they still exist and are
functional.
The disadvantage is that rename gets no longer hashed, which is probably fine.
Test Plan: Added a test
Reviewers: davidsp, #sourcecontrol, rmcelroy
Reviewed By: rmcelroy
Subscribers: jsgf, rmcelroy, stash, mjpieters, davidsp
Differential Revision: https://phabricator.intern.facebook.com/D4849764
Signature: t1:4849764:1491580506:1d80ad476b9cbd6773843cb52aee6745f478a0b0
2017-04-08 04:29:35 +03:00
|
|
|
if node == nullid:
|
|
|
|
return False
|
2017-04-27 05:55:02 +03:00
|
|
|
flags = rlog.flags(rev)
|
lfs: do not store mercurial filelog metadata in lfs blobs
Summary:
Per discussion with @davidsp, it's better for LFS to not store Mercurial
filelog metadata, which is currently used to store rename information. That has
many advantages:
- Large blobs could be reused across renames
- No need to special handle files starting with `\1\n`
- P4 LFS server implementation is much easier
- remotefilelog LFS support is easier and cleaner
That said, the rename information is stored as lfs metadata using the
non-standard `x-hg-copy`, `x-hg-copyrev` keys. So they still exist and are
functional.
The disadvantage is that rename gets no longer hashed, which is probably fine.
Test Plan: Added a test
Reviewers: davidsp, #sourcecontrol, rmcelroy
Reviewed By: rmcelroy
Subscribers: jsgf, rmcelroy, stash, mjpieters, davidsp
Differential Revision: https://phabricator.intern.facebook.com/D4849764
Signature: t1:4849764:1491580506:1d80ad476b9cbd6773843cb52aee6745f478a0b0
2017-04-08 04:29:35 +03:00
|
|
|
return bool(flags & revlog.REVIDX_EXTSTORED)
|
|
|
|
|
2017-04-22 05:56:27 +03:00
|
|
|
def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
|
|
|
|
cachedelta=None, node=None,
|
|
|
|
flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
|
2017-05-08 21:22:01 +03:00
|
|
|
threshold = self.opener.options['lfsthreshold']
|
|
|
|
textlen = len(text)
|
|
|
|
# exclude hg rename meta from file size
|
|
|
|
meta, offset = filelog.parsemeta(text)
|
|
|
|
if offset:
|
|
|
|
textlen -= offset
|
|
|
|
|
|
|
|
if threshold and textlen > threshold:
|
|
|
|
flags |= revlog.REVIDX_EXTSTORED
|
2017-04-22 05:56:27 +03:00
|
|
|
|
|
|
|
return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
|
|
|
|
node=node, flags=flags, **kwds)
|
2017-03-25 05:00:30 +03:00
|
|
|
|
lfs: override filelog.renamed code path
Summary:
An `hg pull` test triggers the following code path server-side when using
remotefilelog:
...
remote: File "/usr/lib64/python2.7/site-packages/remotefilelog/remotefilelogserver.py", line 308, in streamer
remote: text = _loadfileblob(repo, cachepath, path, node)
remote: File "/usr/lib64/python2.7/site-packages/remotefilelog/remotefilelogserver.py", line 223, in _loadfileblob
remote: text = createfileblob(filectx)
remote: File "/usr/lib64/python2.7/site-packages/remotefilelog/remotefilelogserver.py", line 348, in createfileblob
remote: ancestors.extend([f for f in filectx.ancestors()])
remote: File "/usr/lib64/python2.7/site-packages/mercurial/context.py", line 1072, in ancestors
remote: for parent in c.parents()[:cut]:
remote: File "/usr/lib64/python2.7/site-packages/mercurial/context.py", line 923, in parents
remote: r = fl.renamed(self._filenode)
remote: File "/usr/lib64/python2.7/site-packages/mercurial/filelog.py", line 62, in renamed
remote: t = self.revision(node)
...
That triggers downloading a blob. We don't want to do that server-side. So
override the `renamed` method to use LFS fast path to test rename.
Practically, this reverts part of D4906074.
Test Plan: Run existing tests. This change was made on the server.
Reviewers: davidsp, #mercurial, rmcelroy
Reviewed By: rmcelroy
Subscribers: rmcelroy, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D4992421
Signature: t1:4992421:1493802628:2bf2cf819bfed2aa61ea1c2323c03ab428732815
2017-05-03 21:10:48 +03:00
|
|
|
def filelogrenamed(orig, self, node):
|
|
|
|
if _islfs(self, node):
|
|
|
|
rawtext = self.revision(node, raw=True)
|
|
|
|
if not rawtext:
|
|
|
|
return False
|
|
|
|
metadata = pointer.deserialize(rawtext)
|
|
|
|
if 'x-hg-copy' in metadata and 'x-hg-copyrev' in metadata:
|
|
|
|
return metadata['x-hg-copy'], bin(metadata['x-hg-copyrev'])
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
return orig(self, node)
|
|
|
|
|
2017-04-11 23:20:58 +03:00
|
|
|
def filelogsize(orig, self, rev):
|
|
|
|
if _islfs(self, rev=rev):
|
|
|
|
# fast path: use lfs metadata to answer size
|
|
|
|
rawtext = self.revision(rev, raw=True)
|
|
|
|
metadata = pointer.deserialize(rawtext)
|
|
|
|
return int(metadata['size'])
|
|
|
|
return orig(self, rev)
|
|
|
|
|
2017-05-17 01:38:51 +03:00
|
|
|
def filectxcmp(orig, self, fctx):
|
|
|
|
"""returns True if text is different than fctx"""
|
2017-05-17 01:41:39 +03:00
|
|
|
# some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
|
|
|
|
if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
|
2017-05-17 01:38:51 +03:00
|
|
|
# fast path: check LFS oid
|
|
|
|
p1 = pointer.deserialize(self.rawdata())
|
|
|
|
p2 = pointer.deserialize(fctx.rawdata())
|
|
|
|
return p1.oid() != p2.oid()
|
|
|
|
return orig(self, fctx)
|
|
|
|
|
2017-05-04 19:18:04 +03:00
|
|
|
def filectxisbinary(orig, self):
|
2017-05-17 01:41:39 +03:00
|
|
|
if self.islfs():
|
2017-05-04 19:18:04 +03:00
|
|
|
# fast path: use lfs metadata to answer isbinary
|
2017-05-17 01:41:39 +03:00
|
|
|
metadata = pointer.deserialize(self.rawdata())
|
2017-05-04 19:18:04 +03:00
|
|
|
# if lfs metadata says nothing, assume it's binary by default
|
2017-05-12 03:48:21 +03:00
|
|
|
return bool(int(metadata.get('x-is-binary', 1)))
|
2017-05-04 19:18:04 +03:00
|
|
|
return orig(self)
|
|
|
|
|
2017-05-17 01:41:39 +03:00
|
|
|
def filectxislfs(self):
|
|
|
|
return _islfs(self.filelog(), self.filenode())
|
|
|
|
|
2017-04-10 21:11:37 +03:00
|
|
|
def vfsinit(orig, self, othervfs):
|
|
|
|
orig(self, othervfs)
|
|
|
|
# copy lfs related options
|
|
|
|
for k, v in othervfs.options.items():
|
|
|
|
if k.startswith('lfs'):
|
|
|
|
self.options[k] = v
|
|
|
|
# also copy lfs blobstores. note: this can run before reposetup, so lfs
|
|
|
|
# blobstore attributes are not always ready at this time.
|
|
|
|
for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
|
2017-05-17 01:43:36 +03:00
|
|
|
if util.safehasattr(othervfs, name):
|
2017-04-10 21:11:37 +03:00
|
|
|
setattr(self, name, getattr(othervfs, name))
|
|
|
|
|
2017-05-19 01:10:39 +03:00
|
|
|
def _canskipupload(repo):
|
|
|
|
# if remotestore is a null store, upload is a no-op and can be skipped
|
|
|
|
return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
|
|
|
|
|
2017-03-25 05:00:30 +03:00
|
|
|
def prepush(pushop):
|
|
|
|
"""Prepush hook.
|
|
|
|
|
|
|
|
Read through the revisions to push, looking for filelog entries that can be
|
|
|
|
deserialized into metadata so that we can block the push on their upload to
|
|
|
|
the remote blobstore.
|
|
|
|
"""
|
2017-05-19 01:10:39 +03:00
|
|
|
if _canskipupload(pushop.repo):
|
|
|
|
return
|
2017-05-08 21:20:50 +03:00
|
|
|
pointers = extractpointers(pushop.repo, pushop.outgoing.missing)
|
2017-05-12 03:57:42 +03:00
|
|
|
uploadblobs(pushop.repo, pointers)
|
2017-03-25 05:00:30 +03:00
|
|
|
|
2017-05-17 01:49:32 +03:00
|
|
|
def writenewbundle(orig, ui, repo, source, filename, bundletype, outgoing,
|
|
|
|
*args, **kwargs):
|
|
|
|
"""upload LFS blobs added by outgoing revisions on 'hg bundle'"""
|
2017-05-19 01:10:39 +03:00
|
|
|
if _canskipupload(repo):
|
|
|
|
return
|
2017-05-17 01:49:32 +03:00
|
|
|
pointers = extractpointers(repo, outgoing.missing)
|
|
|
|
uploadblobs(repo, pointers)
|
|
|
|
return orig(ui, repo, source, filename, bundletype, outgoing, *args,
|
|
|
|
**kwargs)
|
|
|
|
|
2017-05-08 21:20:50 +03:00
|
|
|
def extractpointers(repo, revs):
|
|
|
|
"""return a list of lfs pointers added by given revs"""
|
|
|
|
ui = repo.ui
|
2017-05-17 01:27:25 +03:00
|
|
|
if ui.debugflag:
|
2017-04-06 01:58:56 +03:00
|
|
|
ui.write(_('lfs: computing set of blobs to upload\n'))
|
2017-05-08 21:20:50 +03:00
|
|
|
pointers = {}
|
|
|
|
for i, n in enumerate(revs):
|
2017-03-25 05:00:30 +03:00
|
|
|
ctx = repo[n]
|
|
|
|
files = set(ctx.files())
|
|
|
|
for f in files:
|
2017-03-31 02:41:46 +03:00
|
|
|
if f not in ctx:
|
|
|
|
continue
|
2017-05-08 21:20:50 +03:00
|
|
|
fctx = ctx[f]
|
|
|
|
if not _islfs(fctx.filelog(), fctx.filenode()):
|
2017-03-25 05:00:30 +03:00
|
|
|
continue
|
|
|
|
try:
|
2017-05-08 21:20:50 +03:00
|
|
|
metadata = pointer.deserialize(fctx.rawdata())
|
|
|
|
pointers[metadata['oid']] = metadata
|
2017-05-17 01:42:30 +03:00
|
|
|
except pointer.InvalidPointer as ex:
|
|
|
|
raise error.Abort(_('lfs: corrupted pointer (%s@%s): %s\n')
|
|
|
|
% (f, short(ctx.node()), ex))
|
2017-05-08 21:20:50 +03:00
|
|
|
return pointers.values()
|
2017-03-25 05:00:30 +03:00
|
|
|
|
2017-05-12 03:57:42 +03:00
|
|
|
def uploadblobs(repo, pointers):
|
|
|
|
"""upload given pointers from local blobstore"""
|
|
|
|
if not pointers:
|
2017-04-06 01:58:56 +03:00
|
|
|
return
|
|
|
|
|
2017-04-06 01:48:10 +03:00
|
|
|
remoteblob = repo.svfs.lfsremoteblobstore
|
2017-05-16 21:19:39 +03:00
|
|
|
remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
|