mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 09:17:30 +03:00
lfs: rewrite pointer logic
Summary: Since we decided to only support GitHub's Git LFS specification, there is no need to support multiple pointer types, so the code could be simplified. The old code special case keys like `version`, `oid`, `hashalgo`, which makes it longer than necessary. The new code is a rewrite treating everything as a normal dict entry so the pointer class is much shorter: 76 -> 21 lines. Data validation is temporary lost, which will be added back (and stronger) by the next diff. It is separated to make review easier. Test Plan: Run existing tests Reviewers: #mercurial, davidsp, rmcelroy Reviewed By: rmcelroy Subscribers: rmcelroy Differential Revision: https://phabricator.intern.facebook.com/D5043547 Signature: t1:5043547:1494543031:ac1100939a10a79dfd749ab6ac9c3bb7fcd84dbf
This commit is contained in:
parent
a94a1bd72a
commit
143b8c67d9
@ -15,91 +15,26 @@ class PointerDeserializationError(error.RevlogError):
|
||||
message = _('invalid lfs pointer format detected')
|
||||
super(PointerDeserializationError, self).__init__(message)
|
||||
|
||||
class BasePointer(object):
|
||||
class GithubPointer(dict):
|
||||
VERSION = 'https://git-lfs.github.com/spec/v1'
|
||||
|
||||
def __init__(self, extrameta=None):
|
||||
self.__metadata = dict()
|
||||
if extrameta:
|
||||
self.__metadata.update(extrameta)
|
||||
def __init__(self, *args, **kwargs):
|
||||
self['version'] = self.VERSION
|
||||
super(GithubPointer, self).__init__(*args, **kwargs)
|
||||
|
||||
def __str__(self):
|
||||
return self.serialize()
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.__metadata.get(key)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.__metadata[key] = value
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.__metadata
|
||||
|
||||
def _transformkv(self, key, value):
|
||||
return '%s %s\n' % (key, value)
|
||||
|
||||
def keys(self):
|
||||
return self.__metadata.keys()
|
||||
@classmethod
|
||||
def deserialize(cls, text):
|
||||
try:
|
||||
return cls(l.split(' ', 1) for l in text.splitlines())
|
||||
except ValueError: # l.split returns 1 item instead of 2
|
||||
raise PointerDeserializationError()
|
||||
|
||||
def serialize(self):
|
||||
matcher = re.compile('[a-z0-9\-\.]+')
|
||||
text = 'version ' + self.VERSION
|
||||
keys = sorted(self.__metadata.keys())
|
||||
for key in keys:
|
||||
if key == 'version':
|
||||
continue
|
||||
assert matcher.match(key)
|
||||
text = text + self._transformkv(key, self.__metadata[key])
|
||||
return text
|
||||
|
||||
class GithubPointer(BasePointer):
|
||||
|
||||
VERSION = 'https://git-lfs.github.com/spec/v1\n'
|
||||
|
||||
def __init__(self, oid, hashalgo, size, extrameta=None):
|
||||
super(GithubPointer, self).__init__(extrameta)
|
||||
self['oid'] = oid
|
||||
self['hashalgo'] = hashalgo
|
||||
self['size'] = size
|
||||
|
||||
def _transformkv(self, key, value):
|
||||
if key == 'hashalgo':
|
||||
return ''
|
||||
elif key == 'oid':
|
||||
return 'oid %s:%s\n' % (self['hashalgo'], value)
|
||||
return '%s %s\n' % (key, value)
|
||||
|
||||
@staticmethod
|
||||
def deserialize(text):
|
||||
metadata = dict()
|
||||
for line in text.splitlines()[1:]:
|
||||
if len(line) == 0:
|
||||
continue
|
||||
key, value = line.split(' ', 1)
|
||||
if key == 'oid':
|
||||
hashalgo, oid = value.split(':', 1)
|
||||
metadata['oid'] = str(oid)
|
||||
metadata['hashalgo'] = hashalgo
|
||||
else:
|
||||
metadata[key] = value
|
||||
assert 'oid' in metadata
|
||||
assert 'size' in metadata
|
||||
return GithubPointer(
|
||||
oid=metadata['oid'],
|
||||
hashalgo=metadata['hashalgo'],
|
||||
size=metadata['size'],
|
||||
extrameta=metadata)
|
||||
sortkeyfunc = lambda x: (x[0] != 'version', x)
|
||||
items = sorted(self.iteritems(), key=sortkeyfunc)
|
||||
return ''.join('%s %s\n' % (k, v) for k, v in items)
|
||||
|
||||
def tostoreid(self):
|
||||
return StoreID(self['oid'], self['size'])
|
||||
return StoreID(self['oid'].split(':')[-1], self['size'])
|
||||
|
||||
def deserialize(text):
|
||||
pointerformats = [
|
||||
GithubPointer,
|
||||
]
|
||||
|
||||
for cls in pointerformats:
|
||||
if text.startswith('version %s' % cls.VERSION):
|
||||
obj = cls.deserialize(text)
|
||||
return obj
|
||||
|
||||
raise PointerDeserializationError()
|
||||
deserialize = GithubPointer.deserialize
|
||||
|
@ -40,14 +40,11 @@ def readfromstore(self, text):
|
||||
contents of the blobstore should be checked using checkhash.
|
||||
"""
|
||||
metadata = pointer.deserialize(text)
|
||||
|
||||
verifyhash = True
|
||||
storeids = [metadata.tostoreid()]
|
||||
storeid = metadata.tostoreid()
|
||||
store = self.opener.lfslocalblobstore
|
||||
missing = filter(lambda id: not store.has(id), storeids)
|
||||
if missing:
|
||||
self.opener.lfsremoteblobstore.readbatch(missing, store)
|
||||
text = ''.join([store.read(id) for id in storeids])
|
||||
if not store.has(storeid):
|
||||
self.opener.lfsremoteblobstore.readbatch([storeid], store)
|
||||
text = store.read(storeid)
|
||||
|
||||
# pack hg filelog metadata
|
||||
hgmeta = {}
|
||||
@ -58,7 +55,7 @@ def readfromstore(self, text):
|
||||
if hgmeta or text.startswith('\1\n'):
|
||||
text = filelog.packmeta(hgmeta, text)
|
||||
|
||||
return (text, verifyhash)
|
||||
return (text, True)
|
||||
|
||||
def writetostore(self, text):
|
||||
# hg filelog metadata (includes rename, etc)
|
||||
@ -75,7 +72,8 @@ def writetostore(self, text):
|
||||
|
||||
# replace contents with metadata
|
||||
hashalgo = 'sha256'
|
||||
metadata = pointer.GithubPointer(storeid.oid, hashalgo, storeid.size)
|
||||
oid = '%s:%s' % (hashalgo, storeid.oid)
|
||||
metadata = pointer.GithubPointer(oid=oid, size=str(storeid.size))
|
||||
|
||||
# by default, we expect the content to be binary. however, LFS could also
|
||||
# be used for non-binary content. add a special entry for non-binary data.
|
||||
@ -88,9 +86,9 @@ def writetostore(self, text):
|
||||
if hgmeta is not None:
|
||||
for k, v in hgmeta.iteritems():
|
||||
metadata['x-hg-%s' % k] = v
|
||||
text = str(metadata)
|
||||
|
||||
return (text, False)
|
||||
rawtext = metadata.serialize()
|
||||
return (rawtext, False)
|
||||
|
||||
def _islfs(rlog, node=None, rev=None):
|
||||
if rev is None:
|
||||
@ -149,7 +147,7 @@ def filectxisbinary(orig, self):
|
||||
rawtext = flog.revision(node, raw=True)
|
||||
metadata = pointer.deserialize(rawtext)
|
||||
# if lfs metadata says nothing, assume it's binary by default
|
||||
return bool(int(metadata['x-is-binary'] or 1))
|
||||
return bool(int(metadata.get('x-is-binary', 1)))
|
||||
return orig(self)
|
||||
|
||||
def vfsinit(orig, self, othervfs):
|
||||
|
@ -347,9 +347,9 @@ class FileImporter(object):
|
||||
if lfsext and lfsext.wrapper._islfs(hgfilelog, node):
|
||||
lfspointer = lfsext.pointer.deserialize(
|
||||
hgfilelog.revision(node, raw=True))
|
||||
largefiles.append((c.cl, self.depotfile, lfspointer['oid']))
|
||||
self._ui.debug('largefile: %s, oid: %s\n' % (
|
||||
self.relpath, lfspointer['oid']))
|
||||
oid = lfspointer.tostoreid().oid
|
||||
largefiles.append((c.cl, self.depotfile, oid))
|
||||
self._ui.debug('largefile: %s, oid: %s\n' % (self.relpath, oid))
|
||||
|
||||
newlen = len(hgfilelog)
|
||||
return fileflags, largefiles, origlen, newlen
|
||||
|
Loading…
Reference in New Issue
Block a user