mirror of
https://github.com/facebook/sapling.git
synced 2024-10-12 01:39:21 +03:00
remotefilelog: remove python mutabledatapack code
Summary: Goodbye. Reviewed By: kulshrax Differential Revision: D16392290 fbshipit-source-id: 541b945e274f097b722b5e79ca03e104c4cfd6a0
This commit is contained in:
parent
c6dccb4955
commit
e86e388661
@ -89,50 +89,6 @@ class datapackstore(basepack.basepackstore):
|
||||
revisionstore.repackincrementaldatapacks(self.path, self.path)
|
||||
|
||||
|
||||
class datapack(object):
|
||||
INDEXSUFFIX = INDEXSUFFIX
|
||||
PACKSUFFIX = PACKSUFFIX
|
||||
|
||||
# Format is <node><delta offset><pack data offset><pack data size>
|
||||
# See the mutabledatapack doccomment for more details.
|
||||
INDEXFORMAT = "!20siQQ"
|
||||
INDEXENTRYLENGTH = 40
|
||||
|
||||
|
||||
def _readdataentry(rawentry, version, getmeta=False):
|
||||
# <2 byte len> + <filename>
|
||||
lengthsize = 2
|
||||
filenamelen = struct.unpack("!H", rawentry[:2])[0]
|
||||
filename = rawentry[lengthsize : lengthsize + filenamelen]
|
||||
|
||||
# <20 byte node> + <20 byte deltabase>
|
||||
nodestart = lengthsize + filenamelen
|
||||
deltabasestart = nodestart + NODELENGTH
|
||||
node = rawentry[nodestart:deltabasestart]
|
||||
deltabasenode = rawentry[deltabasestart : deltabasestart + NODELENGTH]
|
||||
|
||||
# <8 byte len> + <delta>
|
||||
deltastart = deltabasestart + NODELENGTH
|
||||
rawdeltalen = rawentry[deltastart : deltastart + 8]
|
||||
deltalen = struct.unpack("!Q", rawdeltalen)[0]
|
||||
|
||||
delta = rawentry[deltastart + 8 : deltastart + 8 + deltalen]
|
||||
delta = lz4decompress(delta)
|
||||
|
||||
if getmeta:
|
||||
if version == 0:
|
||||
meta = {}
|
||||
else:
|
||||
metastart = deltastart + 8 + deltalen
|
||||
metalen = struct.unpack_from("!I", rawentry, metastart)[0]
|
||||
|
||||
rawmeta = rawentry[metastart + 4 : metastart + 4 + metalen]
|
||||
meta = shallowutil.parsepackmeta(rawmeta)
|
||||
return filename, node, deltabasenode, delta, meta
|
||||
else:
|
||||
return filename, node, deltabasenode, delta
|
||||
|
||||
|
||||
class fastdatapack(basepack.basepack):
|
||||
INDEXSUFFIX = INDEXSUFFIX
|
||||
PACKSUFFIX = PACKSUFFIX
|
||||
@ -205,200 +161,6 @@ class fastdatapack(basepack.basepack):
|
||||
return self.datapack.iterentries()
|
||||
|
||||
|
||||
class mutabledatapack(basepack.mutablebasepack):
|
||||
"""A class for constructing and serializing a datapack file and index.
|
||||
|
||||
A datapack is a pair of files that contain the revision contents for various
|
||||
file revisions in Mercurial. It contains only revision contents (like file
|
||||
contents), not any history information.
|
||||
|
||||
It consists of two files, with the following format. All bytes are in
|
||||
network byte order (big endian).
|
||||
|
||||
.datapack
|
||||
The pack itself is a series of revision deltas with some basic header
|
||||
information on each. A revision delta may be a fulltext, represented by
|
||||
a deltabasenode equal to the nullid.
|
||||
|
||||
datapack = <version: 1 byte>
|
||||
[<revision>,...]
|
||||
revision = <filename len: 2 byte unsigned int>
|
||||
<filename>
|
||||
<node: 20 byte>
|
||||
<deltabasenode: 20 byte>
|
||||
<delta len: 8 byte unsigned int>
|
||||
<delta>
|
||||
<metadata-list len: 4 byte unsigned int> [1]
|
||||
<metadata-list> [1]
|
||||
metadata-list = [<metadata-item>, ...]
|
||||
metadata-item = <metadata-key: 1 byte>
|
||||
<metadata-value len: 2 byte unsigned>
|
||||
<metadata-value>
|
||||
|
||||
metadata-key could be METAKEYFLAG or METAKEYSIZE or other single byte
|
||||
value in the future.
|
||||
|
||||
.dataidx
|
||||
The index file consists of two parts, the fanout and the index.
|
||||
|
||||
The index is a list of index entries, sorted by node (one per revision
|
||||
in the pack). Each entry has:
|
||||
|
||||
- node (The 20 byte node of the entry; i.e. the commit hash, file node
|
||||
hash, etc)
|
||||
- deltabase index offset (The location in the index of the deltabase for
|
||||
this entry. The deltabase is the next delta in
|
||||
the chain, with the chain eventually
|
||||
terminating in a full-text, represented by a
|
||||
deltabase offset of -1. This lets us compute
|
||||
delta chains from the index, then do
|
||||
sequential reads from the pack if the revision
|
||||
are nearby on disk.)
|
||||
- pack entry offset (The location of this entry in the datapack)
|
||||
- pack content size (The on-disk length of this entry's pack data)
|
||||
|
||||
The fanout is a quick lookup table to reduce the number of steps for
|
||||
bisecting the index. It is a series of 4 byte pointers to positions
|
||||
within the index. It has 2^16 entries, which corresponds to hash
|
||||
prefixes [0000, 0001,..., FFFE, FFFF]. Example: the pointer in slot
|
||||
4F0A points to the index position of the first revision whose node
|
||||
starts with 4F0A. This saves log(2^16)=16 bisect steps.
|
||||
|
||||
dataidx = <fanouttable>
|
||||
<index>
|
||||
fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries)
|
||||
index = [<index entry>,...]
|
||||
indexentry = <node: 20 byte>
|
||||
<deltabase location: 4 byte signed int>
|
||||
<pack entry offset: 8 byte unsigned int>
|
||||
<pack entry size: 8 byte unsigned int>
|
||||
|
||||
[1]: new in version 1.
|
||||
"""
|
||||
|
||||
INDEXSUFFIX = INDEXSUFFIX
|
||||
PACKSUFFIX = PACKSUFFIX
|
||||
|
||||
# v[01] index format: <node><delta offset><pack data offset><pack data size>
|
||||
INDEXFORMAT = datapack.INDEXFORMAT
|
||||
INDEXENTRYLENGTH = datapack.INDEXENTRYLENGTH
|
||||
|
||||
# v1 has metadata support
|
||||
SUPPORTED_VERSIONS = [0, 1]
|
||||
|
||||
def add(self, name, node, deltabasenode, delta, metadata=None):
|
||||
# metadata is a dict, ex. {METAKEYFLAG: flag}
|
||||
if len(name) > 2 ** 16:
|
||||
raise RuntimeError(_("name too long %s") % name)
|
||||
if len(node) != 20:
|
||||
raise RuntimeError(_("node should be 20 bytes %s") % node)
|
||||
|
||||
if node in self.entries:
|
||||
# The revision has already been added
|
||||
return
|
||||
|
||||
# TODO: allow configurable compression
|
||||
delta = lz4compress(delta)
|
||||
|
||||
rawdata = "%s%s%s%s%s%s" % (
|
||||
struct.pack("!H", len(name)), # unsigned 2 byte int
|
||||
name,
|
||||
node,
|
||||
deltabasenode,
|
||||
struct.pack("!Q", len(delta)), # unsigned 8 byte int
|
||||
delta,
|
||||
)
|
||||
|
||||
if self.VERSION == 1:
|
||||
# v1 support metadata
|
||||
rawmeta = shallowutil.buildpackmeta(metadata)
|
||||
rawdata += struct.pack("!I", len(rawmeta)) # unsigned 4 byte
|
||||
rawdata += rawmeta
|
||||
else:
|
||||
# v0 cannot store metadata, raise if metadata contains flag
|
||||
if metadata and metadata.get(constants.METAKEYFLAG, 0) != 0:
|
||||
raise error.ProgrammingError("v0 pack cannot store flags")
|
||||
|
||||
offset = self._datalen
|
||||
|
||||
size = len(rawdata)
|
||||
|
||||
self.entries[node] = (deltabasenode, offset, size)
|
||||
|
||||
self.writeraw(rawdata)
|
||||
|
||||
def createindex(self, nodelocations, indexoffset):
|
||||
entries = sorted((n, db, o, s) for n, (db, o, s) in self.entries.iteritems())
|
||||
|
||||
rawindex = ""
|
||||
fmt = self.INDEXFORMAT
|
||||
for node, deltabase, offset, size in entries:
|
||||
if deltabase == nullid:
|
||||
deltabaselocation = FULLTEXTINDEXMARK
|
||||
else:
|
||||
# Instead of storing the deltabase node in the index, let's
|
||||
# store a pointer directly to the index entry for the deltabase.
|
||||
deltabaselocation = nodelocations.get(deltabase, NOBASEINDEXMARK)
|
||||
|
||||
entry = struct.pack(fmt, node, deltabaselocation, offset, size)
|
||||
rawindex += entry
|
||||
|
||||
return rawindex
|
||||
|
||||
def get(self, name, node):
|
||||
raise RuntimeError("must use getdeltachain with mutabledatapack")
|
||||
|
||||
def getmeta(self, name, node):
|
||||
delta, deltaname, deltabasenode, meta = self.getdelta(name, node)
|
||||
return meta
|
||||
|
||||
def getdelta(self, name, node):
|
||||
value = self.entries.get(node)
|
||||
if value is None:
|
||||
raise KeyError(name, hex(node))
|
||||
|
||||
deltabasenode, offset, size = self.entries[node]
|
||||
|
||||
try:
|
||||
# Seek to data
|
||||
self.packfp.seek(offset, os.SEEK_SET)
|
||||
data = self.packfp.read(size)
|
||||
finally:
|
||||
# Seek back to the end
|
||||
self.packfp.seek(0, os.SEEK_END)
|
||||
|
||||
entry = _readdataentry(data, self.VERSION, getmeta=True)
|
||||
filename, node, deltabasenode, delta, meta = entry
|
||||
return delta, filename, deltabasenode, meta
|
||||
|
||||
def getdeltachain(self, name, node):
|
||||
deltachain = []
|
||||
while node != nullid:
|
||||
try:
|
||||
value = self.getdelta(name, node)
|
||||
delta, deltaname, deltabasenode, meta = value
|
||||
deltachain.append((name, node, deltaname, deltabasenode, delta))
|
||||
name = deltaname
|
||||
node = deltabasenode
|
||||
except KeyError:
|
||||
# If we don't even have the first entry, throw. Otherwise return
|
||||
# what we have
|
||||
if not deltachain:
|
||||
raise
|
||||
break
|
||||
|
||||
return deltachain
|
||||
|
||||
def getmissing(self, keys):
|
||||
missing = []
|
||||
for name, node in keys:
|
||||
value = self.entries.get(node)
|
||||
if value is None:
|
||||
missing.append((name, node))
|
||||
|
||||
return missing
|
||||
|
||||
|
||||
class memdatapack(object):
|
||||
def __init__(self):
|
||||
self.data = {}
|
||||
|
Loading…
Reference in New Issue
Block a user