lfs: rewrite pointer logic

Summary:
Since we decided to only support GitHub's Git LFS specification, there is no
need to support multiple pointer types, so the code could be simplified.

The old code special case keys like `version`, `oid`, `hashalgo`, which makes
it longer than necessary. The new code is a rewrite treating everything as a
normal dict entry so the pointer class is much shorter: 76 -> 21 lines.

Data validation is temporary lost, which will be added back (and stronger) by
the next diff. It is separated to make review easier.


Test Plan: Run existing tests

Reviewers: #mercurial, davidsp, rmcelroy

Reviewed By: rmcelroy

Subscribers: rmcelroy

Differential Revision: https://phabricator.intern.facebook.com/D5043547

Signature: t1:5043547:1494543031:ac1100939a10a79dfd749ab6ac9c3bb7fcd84dbf
This commit is contained in:
Jun Wu 2017-05-11 17:48:21 -07:00
parent a94a1bd72a
commit 143b8c67d9
3 changed files with 29 additions and 96 deletions

View File

@ -15,91 +15,26 @@ class PointerDeserializationError(error.RevlogError):
message = _('invalid lfs pointer format detected')
super(PointerDeserializationError, self).__init__(message)
class BasePointer(object):
class GithubPointer(dict):
VERSION = 'https://git-lfs.github.com/spec/v1'
def __init__(self, extrameta=None):
self.__metadata = dict()
if extrameta:
self.__metadata.update(extrameta)
def __init__(self, *args, **kwargs):
self['version'] = self.VERSION
super(GithubPointer, self).__init__(*args, **kwargs)
def __str__(self):
return self.serialize()
def __getitem__(self, key):
return self.__metadata.get(key)
def __setitem__(self, key, value):
self.__metadata[key] = value
def __contains__(self, key):
return key in self.__metadata
def _transformkv(self, key, value):
return '%s %s\n' % (key, value)
def keys(self):
return self.__metadata.keys()
@classmethod
def deserialize(cls, text):
try:
return cls(l.split(' ', 1) for l in text.splitlines())
except ValueError: # l.split returns 1 item instead of 2
raise PointerDeserializationError()
def serialize(self):
matcher = re.compile('[a-z0-9\-\.]+')
text = 'version ' + self.VERSION
keys = sorted(self.__metadata.keys())
for key in keys:
if key == 'version':
continue
assert matcher.match(key)
text = text + self._transformkv(key, self.__metadata[key])
return text
class GithubPointer(BasePointer):
VERSION = 'https://git-lfs.github.com/spec/v1\n'
def __init__(self, oid, hashalgo, size, extrameta=None):
super(GithubPointer, self).__init__(extrameta)
self['oid'] = oid
self['hashalgo'] = hashalgo
self['size'] = size
def _transformkv(self, key, value):
if key == 'hashalgo':
return ''
elif key == 'oid':
return 'oid %s:%s\n' % (self['hashalgo'], value)
return '%s %s\n' % (key, value)
@staticmethod
def deserialize(text):
metadata = dict()
for line in text.splitlines()[1:]:
if len(line) == 0:
continue
key, value = line.split(' ', 1)
if key == 'oid':
hashalgo, oid = value.split(':', 1)
metadata['oid'] = str(oid)
metadata['hashalgo'] = hashalgo
else:
metadata[key] = value
assert 'oid' in metadata
assert 'size' in metadata
return GithubPointer(
oid=metadata['oid'],
hashalgo=metadata['hashalgo'],
size=metadata['size'],
extrameta=metadata)
sortkeyfunc = lambda x: (x[0] != 'version', x)
items = sorted(self.iteritems(), key=sortkeyfunc)
return ''.join('%s %s\n' % (k, v) for k, v in items)
def tostoreid(self):
return StoreID(self['oid'], self['size'])
return StoreID(self['oid'].split(':')[-1], self['size'])
def deserialize(text):
pointerformats = [
GithubPointer,
]
for cls in pointerformats:
if text.startswith('version %s' % cls.VERSION):
obj = cls.deserialize(text)
return obj
raise PointerDeserializationError()
deserialize = GithubPointer.deserialize

View File

@ -40,14 +40,11 @@ def readfromstore(self, text):
contents of the blobstore should be checked using checkhash.
"""
metadata = pointer.deserialize(text)
verifyhash = True
storeids = [metadata.tostoreid()]
storeid = metadata.tostoreid()
store = self.opener.lfslocalblobstore
missing = filter(lambda id: not store.has(id), storeids)
if missing:
self.opener.lfsremoteblobstore.readbatch(missing, store)
text = ''.join([store.read(id) for id in storeids])
if not store.has(storeid):
self.opener.lfsremoteblobstore.readbatch([storeid], store)
text = store.read(storeid)
# pack hg filelog metadata
hgmeta = {}
@ -58,7 +55,7 @@ def readfromstore(self, text):
if hgmeta or text.startswith('\1\n'):
text = filelog.packmeta(hgmeta, text)
return (text, verifyhash)
return (text, True)
def writetostore(self, text):
# hg filelog metadata (includes rename, etc)
@ -75,7 +72,8 @@ def writetostore(self, text):
# replace contents with metadata
hashalgo = 'sha256'
metadata = pointer.GithubPointer(storeid.oid, hashalgo, storeid.size)
oid = '%s:%s' % (hashalgo, storeid.oid)
metadata = pointer.GithubPointer(oid=oid, size=str(storeid.size))
# by default, we expect the content to be binary. however, LFS could also
# be used for non-binary content. add a special entry for non-binary data.
@ -88,9 +86,9 @@ def writetostore(self, text):
if hgmeta is not None:
for k, v in hgmeta.iteritems():
metadata['x-hg-%s' % k] = v
text = str(metadata)
return (text, False)
rawtext = metadata.serialize()
return (rawtext, False)
def _islfs(rlog, node=None, rev=None):
if rev is None:
@ -149,7 +147,7 @@ def filectxisbinary(orig, self):
rawtext = flog.revision(node, raw=True)
metadata = pointer.deserialize(rawtext)
# if lfs metadata says nothing, assume it's binary by default
return bool(int(metadata['x-is-binary'] or 1))
return bool(int(metadata.get('x-is-binary', 1)))
return orig(self)
def vfsinit(orig, self, othervfs):

View File

@ -347,9 +347,9 @@ class FileImporter(object):
if lfsext and lfsext.wrapper._islfs(hgfilelog, node):
lfspointer = lfsext.pointer.deserialize(
hgfilelog.revision(node, raw=True))
largefiles.append((c.cl, self.depotfile, lfspointer['oid']))
self._ui.debug('largefile: %s, oid: %s\n' % (
self.relpath, lfspointer['oid']))
oid = lfspointer.tostoreid().oid
largefiles.append((c.cl, self.depotfile, oid))
self._ui.debug('largefile: %s, oid: %s\n' % (self.relpath, oid))
newlen = len(hgfilelog)
return fileflags, largefiles, origlen, newlen