From 8fcd86af16fedd8f25900a4d05d858c48dbfdc71 Mon Sep 17 00:00:00 2001 From: Jun Wu Date: Wed, 26 Apr 2017 13:34:15 -0700 Subject: [PATCH] remotefilelog: move constants to class to prepare index format change Summary: To be able to bump version and change formats, the related constants need to be moved to individual classes. So a class (ex. datapack) can be subclassed to handle different formats. Test Plan: `arc unit` Reviewers: #mercurial, durham Reviewed By: durham Subscribers: mjpieters Differential Revision: https://phabricator.intern.facebook.com/D4927284 Signature: t1:4927284:1493152641:e3274dd735d50baf193b7615dd314f4e6cf161f0 --- remotefilelog/basepack.py | 12 +++++++----- remotefilelog/datapack.py | 36 +++++++++++++++++++++--------------- remotefilelog/historypack.py | 26 +++++++++++++++++++------- 3 files changed, 47 insertions(+), 27 deletions(-) diff --git a/remotefilelog/basepack.py b/remotefilelog/basepack.py index 90c2405cff..9f6886df21 100644 --- a/remotefilelog/basepack.py +++ b/remotefilelog/basepack.py @@ -8,7 +8,6 @@ import shallowutil # The pack version supported by this implementation. This will need to be # rev'd whenever the byte format changes. Ex: changing the fanout prefix, # changing any of the int sizes, changing the delta algorithm, etc. -VERSION = 0 PACKVERSIONSIZE = 1 INDEXVERSIONSIZE = 2 @@ -134,6 +133,7 @@ class basepack(object): # The maximum amount we should read via mmap before remmaping so the old # pages can be released (100MB) MAXPAGEDIN = 100 * 1024**2 + VERSION = 0 def __init__(self, path): self.path = path @@ -151,13 +151,13 @@ class basepack(object): self.freememory() # initialize the mmap version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0] - if version != VERSION: + if version != self.VERSION: raise RuntimeError("unsupported pack version '%s'" % version) version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE]) - if version != VERSION: + if version != self.VERSION: raise RuntimeError("unsupported pack index version '%s'" % version) @@ -210,6 +210,8 @@ class basepack(object): raise NotImplemented() class mutablebasepack(object): + VERSION = 0 + def __init__(self, ui, packdir): opener = vfsmod.vfs(packdir) opener.createmode = 0o444 @@ -236,7 +238,7 @@ class mutablebasepack(object): # Write header # TODO: make it extensible (ex: allow specifying compression algorithm, # a flexible key/value header, delta algorithm, fanout size, etc) - version = struct.pack('!B', VERSION) # unsigned 1 byte int + version = struct.pack('!B', self.VERSION) # unsigned 1 byte int self.writeraw(version) def __enter__(self): @@ -334,7 +336,7 @@ class mutablebasepack(object): config = 0 if indexparams.fanoutprefix == LARGEFANOUTPREFIX: config = 0b10000000 - self.idxfp.write(struct.pack('!BB', VERSION, config)) + self.idxfp.write(struct.pack('!BB', self.VERSION, config)) class indexparams(object): __slots__ = ('fanoutprefix', 'fanoutstruct', 'fanoutcount', 'fanoutsize', diff --git a/remotefilelog/datapack.py b/remotefilelog/datapack.py index 250863837d..579eb707d6 100644 --- a/remotefilelog/datapack.py +++ b/remotefilelog/datapack.py @@ -10,10 +10,6 @@ try: except ImportError: cstore = None -# Index entry format is: -# See the mutabledatapack doccomment for more details. -INDEXFORMAT = '!20siQQ' -INDEXENTRYLENGTH = 40 NODELENGTH = 20 # The indicator value in the index for a fulltext entry. @@ -62,6 +58,11 @@ class datapack(basepack.basepack): INDEXSUFFIX = INDEXSUFFIX PACKSUFFIX = PACKSUFFIX + # Format is + # See the mutabledatapack doccomment for more details. + INDEXFORMAT = '!20siQQ' + INDEXENTRYLENGTH = 40 + def getmissing(self, keys): missing = [] for name, node in keys: @@ -85,11 +86,12 @@ class datapack(basepack.basepack): # Precompute chains chain = [value] deltabaseoffset = value[1] + entrylen = self.INDEXENTRYLENGTH while (deltabaseoffset != FULLTEXTINDEXMARK and deltabaseoffset != NOBASEINDEXMARK): loc = params.indexstart + deltabaseoffset - value = struct.unpack(INDEXFORMAT, self._index[loc:loc + - INDEXENTRYLENGTH]) + value = struct.unpack(self.INDEXFORMAT, + self._index[loc:loc + entrylen]) deltabaseoffset = value[1] chain.append(value) @@ -148,17 +150,18 @@ class datapack(basepack.basepack): index = self._index startnode = index[start:start + NODELENGTH] endnode = index[end:end + NODELENGTH] + entrylen = self.INDEXENTRYLENGTH if startnode == node: - entry = index[start:start + INDEXENTRYLENGTH] + entry = index[start:start + entrylen] elif endnode == node: - entry = index[end:end + INDEXENTRYLENGTH] + entry = index[end:end + entrylen] else: - while start < end - INDEXENTRYLENGTH: + while start < end - entrylen: mid = start + (end - start) / 2 - mid = mid - ((mid - params.indexstart) % INDEXENTRYLENGTH) + mid = mid - ((mid - params.indexstart) % entrylen) midnode = index[mid:mid + NODELENGTH] if midnode == node: - entry = index[mid:mid + INDEXENTRYLENGTH] + entry = index[mid:mid + entrylen] break if node > midnode: start = mid @@ -169,7 +172,7 @@ class datapack(basepack.basepack): else: return None - return struct.unpack(INDEXFORMAT, entry) + return struct.unpack(self.INDEXFORMAT, entry) def markledger(self, ledger): for filename, node in self: @@ -349,7 +352,10 @@ class mutabledatapack(basepack.mutablebasepack): """ INDEXSUFFIX = INDEXSUFFIX PACKSUFFIX = PACKSUFFIX - INDEXENTRYLENGTH = INDEXENTRYLENGTH + + # v0 index format: + INDEXFORMAT = datapack.INDEXFORMAT + INDEXENTRYLENGTH = datapack.INDEXENTRYLENGTH def add(self, name, node, deltabasenode, delta): if len(name) > 2**16: @@ -384,6 +390,7 @@ class mutabledatapack(basepack.mutablebasepack): in self.entries.iteritems()) rawindex = '' + fmt = self.INDEXFORMAT for node, deltabase, offset, size in entries: if deltabase == nullid: deltabaselocation = FULLTEXTINDEXMARK @@ -393,8 +400,7 @@ class mutabledatapack(basepack.mutablebasepack): deltabaselocation = nodelocations.get(deltabase, NOBASEINDEXMARK) - entry = struct.pack(INDEXFORMAT, node, deltabaselocation, offset, - size) + entry = struct.pack(fmt, node, deltabaselocation, offset, size) rawindex += entry return rawindex diff --git a/remotefilelog/historypack.py b/remotefilelog/historypack.py index c6258e8f74..bfc7b7c137 100644 --- a/remotefilelog/historypack.py +++ b/remotefilelog/historypack.py @@ -53,6 +53,11 @@ class historypack(basepack.basepack): INDEXSUFFIX = INDEXSUFFIX PACKSUFFIX = PACKSUFFIX + INDEXFORMAT = INDEXFORMAT + INDEXENTRYLENGTH = INDEXENTRYLENGTH + + VERSION = 0 + def getmissing(self, keys): missing = [] for name, node in keys: @@ -167,17 +172,19 @@ class historypack(basepack.basepack): # Bisect between start and end to find node startnode = self._index[start:start + NODELENGTH] endnode = self._index[end:end + NODELENGTH] + entrylen = self.INDEXENTRYLENGTH + if startnode == namehash: - entry = self._index[start:start + INDEXENTRYLENGTH] + entry = self._index[start:start + entrylen] elif endnode == namehash: - entry = self._index[end:end + INDEXENTRYLENGTH] + entry = self._index[end:end + entrylen] else: - while start < end - INDEXENTRYLENGTH: + while start < end - entrylen: mid = start + (end - start) / 2 - mid = mid - ((mid - params.indexstart) % INDEXENTRYLENGTH) + mid = mid - ((mid - params.indexstart) % entrylen) midnode = self._index[mid:mid + NODELENGTH] if midnode == namehash: - entry = self._index[mid:mid + INDEXENTRYLENGTH] + entry = self._index[mid:mid + entrylen] break if namehash > midnode: start = mid @@ -188,7 +195,7 @@ class historypack(basepack.basepack): else: raise KeyError(name) - filenamehash, offset, size = struct.unpack(INDEXFORMAT, entry) + filenamehash, offset, size = struct.unpack(self.INDEXFORMAT, entry) filenamelength = struct.unpack('!H', self._data[offset:offset + constants.FILENAMESIZE])[0] offset += constants.FILENAMESIZE @@ -318,8 +325,12 @@ class mutablehistorypack(basepack.mutablebasepack): """ INDEXSUFFIX = INDEXSUFFIX PACKSUFFIX = PACKSUFFIX + + INDEXFORMAT = INDEXFORMAT INDEXENTRYLENGTH = INDEXENTRYLENGTH + VERSION = 0 + def __init__(self, ui, packpath): super(mutablehistorypack, self).__init__(ui, packpath) self.pastfiles = {} @@ -379,7 +390,8 @@ class mutablehistorypack(basepack.mutablebasepack): files = sorted(files) rawindex = "" + fmt = self.INDEXFORMAT for namehash, offset, size in files: - rawindex += struct.pack(INDEXFORMAT, namehash, offset, size) + rawindex += struct.pack(fmt, namehash, offset, size) return rawindex