2018-10-27 01:07:18 +03:00
|
|
|
# Copyright 2016 Facebook, Inc.
|
|
|
|
#
|
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
|
2017-05-02 05:03:25 +03:00
|
|
|
from __future__ import absolute_import
|
|
|
|
|
2018-02-13 21:12:51 +03:00
|
|
|
import os
|
2017-03-18 00:02:26 +03:00
|
|
|
import struct
|
2018-02-13 21:12:51 +03:00
|
|
|
|
2019-01-30 03:25:33 +03:00
|
|
|
from edenscm.mercurial import error, util
|
|
|
|
from edenscm.mercurial.i18n import _
|
|
|
|
from edenscm.mercurial.node import hex, nullid
|
2018-07-06 03:45:27 +03:00
|
|
|
|
2017-04-27 05:50:36 +03:00
|
|
|
from . import basepack, constants, shallowutil
|
2019-04-04 20:35:38 +03:00
|
|
|
from ..extlib.pyrevisionstore import (
|
|
|
|
datapack as rustdatapack,
|
|
|
|
repackincrementaldatapacks,
|
|
|
|
)
|
2018-07-06 03:45:27 +03:00
|
|
|
from .lz4wrapper import lz4compress, lz4decompress
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
flake8: enable F821 check
Summary:
This check is useful and detects real errors (ex. fbconduit). Unfortunately
`arc lint` will run it with both py2 and py3 so a lot of py2 builtins will
still be warned.
I didn't find a clean way to disable py3 check. So this diff tries to fix them.
For `xrange`, the change was done by a script:
```
import sys
import redbaron
headertypes = {'comment', 'endl', 'from_import', 'import', 'string',
'assignment', 'atomtrailers'}
xrangefix = '''try:
xrange(0)
except NameError:
xrange = range
'''
def isxrange(x):
try:
return x[0].value == 'xrange'
except Exception:
return False
def main(argv):
for i, path in enumerate(argv):
print('(%d/%d) scanning %s' % (i + 1, len(argv), path))
content = open(path).read()
try:
red = redbaron.RedBaron(content)
except Exception:
print(' warning: failed to parse')
continue
hasxrange = red.find('atomtrailersnode', value=isxrange)
hasxrangefix = 'xrange = range' in content
if hasxrangefix or not hasxrange:
print(' no need to change')
continue
# find a place to insert the compatibility statement
changed = False
for node in red:
if node.type in headertypes:
continue
# node.insert_before is an easier API, but it has bugs changing
# other "finally" and "except" positions. So do the insert
# manually.
# # node.insert_before(xrangefix)
line = node.absolute_bounding_box.top_left.line - 1
lines = content.splitlines(1)
content = ''.join(lines[:line]) + xrangefix + ''.join(lines[line:])
changed = True
break
if changed:
# "content" is faster than "red.dumps()"
open(path, 'w').write(content)
print(' updated')
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
```
For other py2 builtins that do not have a py3 equivalent, some `# noqa`
were added as a workaround for now.
Reviewed By: DurhamG
Differential Revision: D6934535
fbshipit-source-id: 546b62830af144bc8b46788d2e0fd00496838939
2018-02-10 04:31:44 +03:00
|
|
|
try:
|
|
|
|
xrange(0)
|
|
|
|
except NameError:
|
|
|
|
xrange = range
|
|
|
|
|
2017-03-14 22:44:43 +03:00
|
|
|
try:
|
2018-01-24 23:30:25 +03:00
|
|
|
from ..extlib import cstore
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2017-03-14 22:44:43 +03:00
|
|
|
cstore.datapack
|
|
|
|
except ImportError:
|
|
|
|
cstore = None
|
2016-04-28 02:49:09 +03:00
|
|
|
|
2016-04-28 02:49:12 +03:00
|
|
|
NODELENGTH = 20
|
2016-04-28 02:49:09 +03:00
|
|
|
|
|
|
|
# The indicator value in the index for a fulltext entry.
|
|
|
|
FULLTEXTINDEXMARK = -1
|
2016-05-16 20:59:09 +03:00
|
|
|
NOBASEINDEXMARK = -2
|
2016-04-28 02:49:09 +03:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
INDEXSUFFIX = ".dataidx"
|
|
|
|
PACKSUFFIX = ".datapack"
|
|
|
|
|
2016-04-28 02:49:09 +03:00
|
|
|
|
2016-05-24 12:15:58 +03:00
|
|
|
class datapackstore(basepack.basepackstore):
|
|
|
|
INDEXSUFFIX = INDEXSUFFIX
|
|
|
|
PACKSUFFIX = PACKSUFFIX
|
2016-05-05 00:53:16 +03:00
|
|
|
|
2018-07-25 20:57:02 +03:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
ui,
|
|
|
|
path,
|
|
|
|
usecdatapack=False,
|
|
|
|
deletecorruptpacks=False,
|
|
|
|
userustdatapack=False,
|
|
|
|
):
|
2016-08-06 00:35:29 +03:00
|
|
|
self.usecdatapack = usecdatapack
|
2018-07-25 20:57:02 +03:00
|
|
|
self.userustdatapack = userustdatapack
|
2018-05-30 12:16:33 +03:00
|
|
|
super(datapackstore, self).__init__(
|
|
|
|
ui, path, deletecorruptpacks=deletecorruptpacks
|
|
|
|
)
|
2016-08-06 00:35:29 +03:00
|
|
|
|
2016-05-24 12:15:58 +03:00
|
|
|
def getpack(self, path):
|
2018-07-25 20:57:02 +03:00
|
|
|
if self.userustdatapack:
|
|
|
|
return rustdatapack(path)
|
|
|
|
elif self.usecdatapack:
|
2016-08-06 00:35:29 +03:00
|
|
|
return fastdatapack(path)
|
|
|
|
else:
|
|
|
|
return datapack(path)
|
2016-04-28 02:49:15 +03:00
|
|
|
|
|
|
|
def get(self, name, node):
|
2016-04-28 02:49:33 +03:00
|
|
|
raise RuntimeError("must use getdeltachain with datapackstore")
|
2016-04-28 02:49:15 +03:00
|
|
|
|
2017-04-27 05:50:36 +03:00
|
|
|
def getmeta(self, name, node):
|
2018-05-25 20:51:49 +03:00
|
|
|
def func(pack):
|
|
|
|
return pack.getmeta(name, node)
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2018-05-25 20:51:49 +03:00
|
|
|
for meta in self.runonpacks(func):
|
|
|
|
return meta
|
2017-04-27 05:50:36 +03:00
|
|
|
|
|
|
|
raise KeyError((name, hex(node)))
|
|
|
|
|
2017-12-15 22:10:31 +03:00
|
|
|
def getdelta(self, name, node):
|
2018-05-25 20:51:49 +03:00
|
|
|
def func(pack):
|
|
|
|
return pack.getdelta(name, node)
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2018-05-25 20:51:49 +03:00
|
|
|
for delta in self.runonpacks(func):
|
|
|
|
return delta
|
2017-12-15 22:10:31 +03:00
|
|
|
|
|
|
|
raise KeyError((name, hex(node)))
|
|
|
|
|
2016-04-28 02:49:15 +03:00
|
|
|
def getdeltachain(self, name, node):
|
2018-05-25 20:51:49 +03:00
|
|
|
def func(pack):
|
|
|
|
return pack.getdeltachain(name, node)
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2018-05-25 20:51:49 +03:00
|
|
|
for deltachain in self.runonpacks(func):
|
|
|
|
return deltachain
|
2016-04-28 02:49:15 +03:00
|
|
|
|
2016-05-26 12:07:11 +03:00
|
|
|
raise KeyError((name, hex(node)))
|
2016-04-28 02:49:15 +03:00
|
|
|
|
|
|
|
def add(self, name, node, data):
|
2016-04-28 02:49:33 +03:00
|
|
|
raise RuntimeError("cannot add to datapackstore")
|
2016-04-28 02:49:15 +03:00
|
|
|
|
2019-04-04 20:35:38 +03:00
|
|
|
def repackstore(self, incremental=True):
|
|
|
|
if self.fetchpacksenabled:
|
|
|
|
repackincrementaldatapacks(self.path, self.path)
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2016-05-24 12:15:58 +03:00
|
|
|
class datapack(basepack.basepack):
|
|
|
|
INDEXSUFFIX = INDEXSUFFIX
|
|
|
|
PACKSUFFIX = PACKSUFFIX
|
2016-04-28 02:49:12 +03:00
|
|
|
|
2017-04-26 23:34:15 +03:00
|
|
|
# Format is <node><delta offset><pack data offset><pack data size>
|
|
|
|
# See the mutabledatapack doccomment for more details.
|
2018-05-30 12:16:33 +03:00
|
|
|
INDEXFORMAT = "!20siQQ"
|
2017-04-26 23:34:15 +03:00
|
|
|
INDEXENTRYLENGTH = 40
|
|
|
|
|
2017-04-27 05:50:36 +03:00
|
|
|
SUPPORTED_VERSIONS = [0, 1]
|
|
|
|
|
2016-04-28 02:49:12 +03:00
|
|
|
def getmissing(self, keys):
|
|
|
|
missing = []
|
|
|
|
for name, node in keys:
|
|
|
|
value = self._find(node)
|
|
|
|
if not value:
|
|
|
|
missing.append((name, node))
|
|
|
|
|
|
|
|
return missing
|
|
|
|
|
|
|
|
def get(self, name, node):
|
2018-05-30 12:16:33 +03:00
|
|
|
raise RuntimeError(
|
|
|
|
"must use getdeltachain with datapack (%s:%s)" % (name, hex(node))
|
|
|
|
)
|
2016-04-28 02:49:12 +03:00
|
|
|
|
2017-04-27 05:50:36 +03:00
|
|
|
def getmeta(self, name, node):
|
|
|
|
value = self._find(node)
|
|
|
|
if value is None:
|
|
|
|
raise KeyError((name, hex(node)))
|
|
|
|
|
|
|
|
# version 0 does not support metadata
|
|
|
|
if self.VERSION == 0:
|
|
|
|
return {}
|
|
|
|
|
|
|
|
node, deltabaseoffset, offset, size = value
|
2018-05-30 12:16:33 +03:00
|
|
|
rawentry = self._data[offset : offset + size]
|
2017-04-27 05:50:36 +03:00
|
|
|
|
|
|
|
# see docstring of mutabledatapack for the format
|
|
|
|
offset = 0
|
2018-05-30 12:16:33 +03:00
|
|
|
offset += struct.unpack_from("!H", rawentry, offset)[0] + 2 # filename
|
|
|
|
offset += 40 # node, deltabase node
|
|
|
|
offset += struct.unpack_from("!Q", rawentry, offset)[0] + 8 # delta
|
2017-04-27 05:50:36 +03:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
metalen = struct.unpack_from("!I", rawentry, offset)[0]
|
2017-04-27 05:50:36 +03:00
|
|
|
offset += 4
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
meta = shallowutil.parsepackmeta(rawentry[offset : offset + metalen])
|
2017-04-27 05:50:36 +03:00
|
|
|
|
|
|
|
return meta
|
|
|
|
|
2017-12-15 22:10:31 +03:00
|
|
|
def getdelta(self, name, node):
|
|
|
|
value = self._find(node)
|
|
|
|
if value is None:
|
|
|
|
raise KeyError((name, hex(node)))
|
|
|
|
|
|
|
|
node, deltabaseoffset, offset, size = value
|
|
|
|
entry = self._readentry(offset, size, getmeta=True)
|
|
|
|
filename, node, deltabasenode, delta, meta = entry
|
|
|
|
|
|
|
|
# If we've read a lot of data from the mmap, free some memory.
|
|
|
|
self.freememory()
|
|
|
|
|
|
|
|
return delta, filename, deltabasenode, meta
|
|
|
|
|
2016-04-28 02:49:12 +03:00
|
|
|
def getdeltachain(self, name, node):
|
|
|
|
value = self._find(node)
|
|
|
|
if value is None:
|
2016-05-26 12:07:11 +03:00
|
|
|
raise KeyError((name, hex(node)))
|
2016-04-28 02:49:12 +03:00
|
|
|
|
2016-05-16 20:59:09 +03:00
|
|
|
params = self.params
|
|
|
|
|
2016-04-28 02:49:12 +03:00
|
|
|
# Precompute chains
|
|
|
|
chain = [value]
|
|
|
|
deltabaseoffset = value[1]
|
2017-04-26 23:34:15 +03:00
|
|
|
entrylen = self.INDEXENTRYLENGTH
|
2018-05-30 12:16:33 +03:00
|
|
|
while (
|
|
|
|
deltabaseoffset != FULLTEXTINDEXMARK and deltabaseoffset != NOBASEINDEXMARK
|
|
|
|
):
|
2016-05-16 20:59:09 +03:00
|
|
|
loc = params.indexstart + deltabaseoffset
|
2018-05-30 12:16:33 +03:00
|
|
|
value = struct.unpack(self.INDEXFORMAT, self._index[loc : loc + entrylen])
|
2016-04-28 02:49:12 +03:00
|
|
|
deltabaseoffset = value[1]
|
|
|
|
chain.append(value)
|
|
|
|
|
|
|
|
# Read chain data
|
|
|
|
deltachain = []
|
|
|
|
for node, deltabaseoffset, offset, size in chain:
|
2017-12-15 22:10:31 +03:00
|
|
|
filename, node, deltabasenode, delta = self._readentry(offset, size)
|
2016-04-28 02:49:12 +03:00
|
|
|
deltachain.append((filename, node, filename, deltabasenode, delta))
|
|
|
|
|
2016-07-12 21:46:48 +03:00
|
|
|
# If we've read a lot of data from the mmap, free some memory.
|
|
|
|
self.freememory()
|
|
|
|
|
2016-04-28 02:49:12 +03:00
|
|
|
return deltachain
|
|
|
|
|
2017-12-15 22:10:31 +03:00
|
|
|
def _readentry(self, offset, size, getmeta=False):
|
2018-05-30 12:16:33 +03:00
|
|
|
rawentry = self._data[offset : offset + size]
|
2017-12-15 22:10:31 +03:00
|
|
|
self._pagedin += len(rawentry)
|
2018-02-13 21:12:49 +03:00
|
|
|
return _readdataentry(rawentry, self.VERSION, getmeta=getmeta)
|
2017-12-15 22:10:31 +03:00
|
|
|
|
2016-04-28 02:49:12 +03:00
|
|
|
def add(self, name, node, data):
|
2016-04-28 02:49:33 +03:00
|
|
|
raise RuntimeError("cannot add to datapack (%s:%s)" % (name, node))
|
2016-04-28 02:49:12 +03:00
|
|
|
|
|
|
|
def _find(self, node):
|
2016-05-16 20:59:09 +03:00
|
|
|
params = self.params
|
2018-05-30 12:16:33 +03:00
|
|
|
fanoutkey = struct.unpack(params.fanoutstruct, node[: params.fanoutprefix])[0]
|
2016-04-28 02:49:12 +03:00
|
|
|
fanout = self._fanouttable
|
|
|
|
|
2016-05-16 20:59:09 +03:00
|
|
|
start = fanout[fanoutkey] + params.indexstart
|
2017-05-03 20:19:45 +03:00
|
|
|
indexend = self._indexend
|
|
|
|
|
2016-04-28 02:49:12 +03:00
|
|
|
# Scan forward to find the first non-same entry, which is the upper
|
|
|
|
# bound.
|
2016-05-16 20:59:09 +03:00
|
|
|
for i in xrange(fanoutkey + 1, params.fanoutcount):
|
|
|
|
end = fanout[i] + params.indexstart
|
2016-04-28 02:49:12 +03:00
|
|
|
if end != start:
|
|
|
|
break
|
|
|
|
else:
|
2017-05-03 20:19:45 +03:00
|
|
|
end = indexend
|
2016-04-28 02:49:12 +03:00
|
|
|
|
|
|
|
# Bisect between start and end to find node
|
|
|
|
index = self._index
|
2018-05-30 12:16:33 +03:00
|
|
|
startnode = index[start : start + NODELENGTH]
|
|
|
|
endnode = index[end : end + NODELENGTH]
|
2017-04-26 23:34:15 +03:00
|
|
|
entrylen = self.INDEXENTRYLENGTH
|
2016-04-28 02:49:12 +03:00
|
|
|
if startnode == node:
|
2018-05-30 12:16:33 +03:00
|
|
|
entry = index[start : start + entrylen]
|
2016-04-28 02:49:12 +03:00
|
|
|
elif endnode == node:
|
2018-05-30 12:16:33 +03:00
|
|
|
entry = index[end : end + entrylen]
|
2016-04-28 02:49:12 +03:00
|
|
|
else:
|
2017-04-26 23:34:15 +03:00
|
|
|
while start < end - entrylen:
|
2018-05-30 12:16:33 +03:00
|
|
|
mid = start + (end - start) / 2
|
2017-04-26 23:34:15 +03:00
|
|
|
mid = mid - ((mid - params.indexstart) % entrylen)
|
2018-05-30 12:16:33 +03:00
|
|
|
midnode = index[mid : mid + NODELENGTH]
|
2016-04-28 02:49:12 +03:00
|
|
|
if midnode == node:
|
2018-05-30 12:16:33 +03:00
|
|
|
entry = index[mid : mid + entrylen]
|
2016-04-28 02:49:12 +03:00
|
|
|
break
|
|
|
|
if node > midnode:
|
|
|
|
start = mid
|
|
|
|
startnode = midnode
|
|
|
|
elif node < midnode:
|
|
|
|
end = mid
|
|
|
|
endnode = midnode
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
2017-04-26 23:34:15 +03:00
|
|
|
return struct.unpack(self.INDEXFORMAT, entry)
|
2016-04-28 02:49:12 +03:00
|
|
|
|
2017-11-09 21:32:15 +03:00
|
|
|
def markledger(self, ledger, options=None):
|
2018-03-16 23:45:58 +03:00
|
|
|
if options and options.get(constants.OPTION_LOOSEONLY):
|
|
|
|
return
|
|
|
|
|
2018-10-26 11:13:41 +03:00
|
|
|
with ledger.location(self._path):
|
|
|
|
for filename, node in self:
|
|
|
|
ledger.markdataentry(self, filename, node)
|
2016-05-05 00:52:46 +03:00
|
|
|
|
|
|
|
def cleanup(self, ledger):
|
|
|
|
entries = ledger.sources.get(self, [])
|
2016-05-16 20:59:09 +03:00
|
|
|
allkeys = set(self)
|
2018-05-30 12:16:33 +03:00
|
|
|
repackedkeys = set(
|
|
|
|
(e.filename, e.node) for e in entries if e.datarepacked or e.gced
|
|
|
|
)
|
2016-05-05 00:52:46 +03:00
|
|
|
|
|
|
|
if len(allkeys - repackedkeys) == 0:
|
2018-07-10 03:39:36 +03:00
|
|
|
if self._path not in ledger.created:
|
2018-07-25 20:57:04 +03:00
|
|
|
util.unlinkpath(self.indexpath(), ignoremissing=True)
|
|
|
|
util.unlinkpath(self.packpath(), ignoremissing=True)
|
2016-05-05 00:52:46 +03:00
|
|
|
|
2016-05-16 20:59:09 +03:00
|
|
|
def __iter__(self):
|
2016-07-12 21:46:48 +03:00
|
|
|
for f, n, deltabase, deltalen in self.iterentries():
|
2016-05-16 20:59:09 +03:00
|
|
|
yield f, n
|
|
|
|
|
2018-04-03 21:18:27 +03:00
|
|
|
def iterentries(self, yieldall=False):
|
|
|
|
"""Yields (filename, node, deltabase, datalength) for each entry.
|
|
|
|
|
|
|
|
If ``yieldall`` is True, yields
|
|
|
|
(filename, node, deltabase, datalength, delta, meta).
|
|
|
|
"""
|
2016-05-05 00:52:46 +03:00
|
|
|
# Start at 1 to skip the header
|
|
|
|
offset = 1
|
2017-04-27 05:50:36 +03:00
|
|
|
data = self._data
|
2018-04-03 21:18:27 +03:00
|
|
|
delta = None
|
|
|
|
meta = None
|
2016-05-05 00:52:46 +03:00
|
|
|
while offset < self.datasize:
|
2017-04-27 05:50:36 +03:00
|
|
|
oldoffset = offset
|
|
|
|
|
2016-05-05 00:52:46 +03:00
|
|
|
# <2 byte len> + <filename>
|
2018-05-30 12:16:33 +03:00
|
|
|
filenamelen = struct.unpack("!H", data[offset : offset + 2])[0]
|
2016-05-05 00:52:46 +03:00
|
|
|
offset += 2
|
2018-05-30 12:16:33 +03:00
|
|
|
filename = data[offset : offset + filenamelen]
|
2016-05-05 00:52:46 +03:00
|
|
|
offset += filenamelen
|
|
|
|
|
2016-05-16 20:59:09 +03:00
|
|
|
# <20 byte node>
|
2018-05-30 12:16:33 +03:00
|
|
|
node = data[offset : offset + constants.NODESIZE]
|
2016-05-16 20:59:09 +03:00
|
|
|
offset += constants.NODESIZE
|
|
|
|
# <20 byte deltabase>
|
2018-05-30 12:16:33 +03:00
|
|
|
deltabase = data[offset : offset + constants.NODESIZE]
|
2016-05-16 20:59:09 +03:00
|
|
|
offset += constants.NODESIZE
|
2016-05-05 00:52:46 +03:00
|
|
|
|
|
|
|
# <8 byte len> + <delta>
|
2018-05-30 12:16:33 +03:00
|
|
|
rawdeltalen = data[offset : offset + 8]
|
|
|
|
deltalen = struct.unpack("!Q", rawdeltalen)[0]
|
2016-08-02 00:16:29 +03:00
|
|
|
offset += 8
|
2016-05-05 00:52:46 +03:00
|
|
|
|
2016-08-02 00:16:29 +03:00
|
|
|
# it has to be at least long enough for the lz4 header.
|
|
|
|
assert deltalen >= 4
|
2016-07-12 21:46:48 +03:00
|
|
|
|
2018-04-03 21:18:27 +03:00
|
|
|
if yieldall:
|
2018-05-30 12:16:33 +03:00
|
|
|
delta = lz4decompress(data[offset : offset + deltalen])
|
2018-04-03 21:18:27 +03:00
|
|
|
|
2016-08-02 00:16:29 +03:00
|
|
|
# python-lz4 stores the length of the uncompressed field as a
|
|
|
|
# little-endian 32-bit integer at the start of the data.
|
2018-05-30 12:16:33 +03:00
|
|
|
uncompressedlen = struct.unpack("<I", data[offset : offset + 4])[0]
|
2016-08-02 00:16:29 +03:00
|
|
|
offset += deltalen
|
|
|
|
|
2017-04-27 05:50:36 +03:00
|
|
|
if self.VERSION == 1:
|
|
|
|
# <4 byte len> + <metadata-list>
|
2018-05-30 12:16:33 +03:00
|
|
|
metalen = struct.unpack("!I", data[offset : offset + 4])[0]
|
2018-04-03 21:18:27 +03:00
|
|
|
offset += 4
|
|
|
|
if yieldall:
|
2018-05-30 12:16:33 +03:00
|
|
|
meta = data[offset : offset + metalen]
|
2018-04-03 21:18:27 +03:00
|
|
|
offset += metalen
|
2016-08-02 00:16:29 +03:00
|
|
|
|
2018-04-03 21:18:27 +03:00
|
|
|
if yieldall:
|
|
|
|
yield (filename, node, deltabase, uncompressedlen, delta, meta)
|
|
|
|
else:
|
|
|
|
yield (filename, node, deltabase, uncompressedlen)
|
2016-05-03 22:33:00 +03:00
|
|
|
|
2016-07-12 21:46:48 +03:00
|
|
|
# If we've read a lot of data from the mmap, free some memory.
|
2017-04-27 05:50:36 +03:00
|
|
|
self._pagedin += offset - oldoffset
|
2017-05-02 05:09:06 +03:00
|
|
|
if self.freememory():
|
|
|
|
data = self._data
|
2016-07-12 21:46:48 +03:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2018-02-13 21:12:49 +03:00
|
|
|
def _readdataentry(rawentry, version, getmeta=False):
|
|
|
|
# <2 byte len> + <filename>
|
|
|
|
lengthsize = 2
|
2018-05-30 12:16:33 +03:00
|
|
|
filenamelen = struct.unpack("!H", rawentry[:2])[0]
|
|
|
|
filename = rawentry[lengthsize : lengthsize + filenamelen]
|
2018-02-13 21:12:49 +03:00
|
|
|
|
|
|
|
# <20 byte node> + <20 byte deltabase>
|
|
|
|
nodestart = lengthsize + filenamelen
|
|
|
|
deltabasestart = nodestart + NODELENGTH
|
|
|
|
node = rawentry[nodestart:deltabasestart]
|
2018-05-30 12:16:33 +03:00
|
|
|
deltabasenode = rawentry[deltabasestart : deltabasestart + NODELENGTH]
|
2018-02-13 21:12:49 +03:00
|
|
|
|
|
|
|
# <8 byte len> + <delta>
|
|
|
|
deltastart = deltabasestart + NODELENGTH
|
2018-05-30 12:16:33 +03:00
|
|
|
rawdeltalen = rawentry[deltastart : deltastart + 8]
|
|
|
|
deltalen = struct.unpack("!Q", rawdeltalen)[0]
|
2018-02-13 21:12:49 +03:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
delta = rawentry[deltastart + 8 : deltastart + 8 + deltalen]
|
2018-02-13 21:12:49 +03:00
|
|
|
delta = lz4decompress(delta)
|
|
|
|
|
|
|
|
if getmeta:
|
|
|
|
if version == 0:
|
|
|
|
meta = {}
|
|
|
|
else:
|
|
|
|
metastart = deltastart + 8 + deltalen
|
2018-05-30 12:16:33 +03:00
|
|
|
metalen = struct.unpack_from("!I", rawentry, metastart)[0]
|
2018-02-13 21:12:49 +03:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
rawmeta = rawentry[metastart + 4 : metastart + 4 + metalen]
|
2018-02-13 21:12:49 +03:00
|
|
|
meta = shallowutil.parsepackmeta(rawmeta)
|
|
|
|
return filename, node, deltabasenode, delta, meta
|
|
|
|
else:
|
|
|
|
return filename, node, deltabasenode, delta
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2016-08-06 00:35:29 +03:00
|
|
|
class fastdatapack(basepack.basepack):
|
|
|
|
INDEXSUFFIX = INDEXSUFFIX
|
|
|
|
PACKSUFFIX = PACKSUFFIX
|
|
|
|
|
|
|
|
def __init__(self, path):
|
2018-07-10 03:39:36 +03:00
|
|
|
self._path = path
|
2018-07-25 20:57:04 +03:00
|
|
|
self._packpath = path + self.PACKSUFFIX
|
|
|
|
self._indexpath = path + self.INDEXSUFFIX
|
2017-02-24 01:03:02 +03:00
|
|
|
self.datapack = cstore.datapack(path)
|
2016-08-06 00:35:29 +03:00
|
|
|
|
|
|
|
def getmissing(self, keys):
|
|
|
|
missing = []
|
|
|
|
for name, node in keys:
|
|
|
|
value = self.datapack._find(node)
|
|
|
|
if not value:
|
|
|
|
missing.append((name, node))
|
|
|
|
|
|
|
|
return missing
|
|
|
|
|
|
|
|
def get(self, name, node):
|
2018-05-30 12:16:33 +03:00
|
|
|
raise RuntimeError(
|
|
|
|
"must use getdeltachain with datapack (%s:%s)" % (name, hex(node))
|
|
|
|
)
|
2016-08-06 00:35:29 +03:00
|
|
|
|
2017-04-27 05:50:36 +03:00
|
|
|
def getmeta(self, name, node):
|
2017-05-01 23:29:19 +03:00
|
|
|
return self.datapack.getmeta(node)
|
2017-04-27 05:50:36 +03:00
|
|
|
|
2017-12-15 22:10:31 +03:00
|
|
|
def getdelta(self, name, node):
|
|
|
|
result = self.datapack.getdelta(node)
|
|
|
|
if result is None:
|
|
|
|
raise KeyError((name, hex(node)))
|
|
|
|
|
|
|
|
delta, deltabasenode, meta = result
|
|
|
|
return delta, name, deltabasenode, meta
|
|
|
|
|
2016-08-06 00:35:29 +03:00
|
|
|
def getdeltachain(self, name, node):
|
|
|
|
result = self.datapack.getdeltachain(node)
|
|
|
|
if result is None:
|
|
|
|
raise KeyError((name, hex(node)))
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
def add(self, name, node, data):
|
|
|
|
raise RuntimeError("cannot add to datapack (%s:%s)" % (name, node))
|
|
|
|
|
2017-11-09 21:32:15 +03:00
|
|
|
def markledger(self, ledger, options=None):
|
2018-03-16 23:45:58 +03:00
|
|
|
if options and options.get(constants.OPTION_LOOSEONLY):
|
|
|
|
return
|
|
|
|
|
2018-10-26 11:13:41 +03:00
|
|
|
with ledger.location(self._path):
|
|
|
|
for filename, node in self:
|
|
|
|
ledger.markdataentry(self, filename, node)
|
2016-08-06 00:35:29 +03:00
|
|
|
|
|
|
|
def cleanup(self, ledger):
|
|
|
|
entries = ledger.sources.get(self, [])
|
|
|
|
allkeys = set(self)
|
2018-05-30 12:16:33 +03:00
|
|
|
repackedkeys = set(
|
|
|
|
(e.filename, e.node) for e in entries if e.datarepacked or e.gced
|
|
|
|
)
|
2016-08-06 00:35:29 +03:00
|
|
|
|
|
|
|
if len(allkeys - repackedkeys) == 0:
|
2018-07-10 03:39:36 +03:00
|
|
|
if self._path not in ledger.created:
|
2018-07-25 20:57:04 +03:00
|
|
|
util.unlinkpath(self.indexpath(), ignoremissing=True)
|
|
|
|
util.unlinkpath(self.packpath(), ignoremissing=True)
|
2016-08-06 00:35:29 +03:00
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
return self.datapack.__iter__()
|
|
|
|
|
|
|
|
def iterentries(self):
|
|
|
|
return self.datapack.iterentries()
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2016-05-20 19:31:37 +03:00
|
|
|
class mutabledatapack(basepack.mutablebasepack):
|
2016-04-28 02:49:09 +03:00
|
|
|
"""A class for constructing and serializing a datapack file and index.
|
|
|
|
|
|
|
|
A datapack is a pair of files that contain the revision contents for various
|
|
|
|
file revisions in Mercurial. It contains only revision contents (like file
|
|
|
|
contents), not any history information.
|
|
|
|
|
|
|
|
It consists of two files, with the following format. All bytes are in
|
|
|
|
network byte order (big endian).
|
|
|
|
|
|
|
|
.datapack
|
|
|
|
The pack itself is a series of revision deltas with some basic header
|
|
|
|
information on each. A revision delta may be a fulltext, represented by
|
|
|
|
a deltabasenode equal to the nullid.
|
|
|
|
|
|
|
|
datapack = <version: 1 byte>
|
|
|
|
[<revision>,...]
|
|
|
|
revision = <filename len: 2 byte unsigned int>
|
|
|
|
<filename>
|
|
|
|
<node: 20 byte>
|
|
|
|
<deltabasenode: 20 byte>
|
|
|
|
<delta len: 8 byte unsigned int>
|
|
|
|
<delta>
|
2017-04-27 05:50:36 +03:00
|
|
|
<metadata-list len: 4 byte unsigned int> [1]
|
|
|
|
<metadata-list> [1]
|
|
|
|
metadata-list = [<metadata-item>, ...]
|
2017-05-02 05:03:25 +03:00
|
|
|
metadata-item = <metadata-key: 1 byte>
|
|
|
|
<metadata-value len: 2 byte unsigned>
|
|
|
|
<metadata-value>
|
2017-04-27 05:50:36 +03:00
|
|
|
|
2017-05-02 05:03:25 +03:00
|
|
|
metadata-key could be METAKEYFLAG or METAKEYSIZE or other single byte
|
|
|
|
value in the future.
|
2016-04-28 02:49:09 +03:00
|
|
|
|
|
|
|
.dataidx
|
|
|
|
The index file consists of two parts, the fanout and the index.
|
|
|
|
|
|
|
|
The index is a list of index entries, sorted by node (one per revision
|
|
|
|
in the pack). Each entry has:
|
|
|
|
|
|
|
|
- node (The 20 byte node of the entry; i.e. the commit hash, file node
|
|
|
|
hash, etc)
|
|
|
|
- deltabase index offset (The location in the index of the deltabase for
|
|
|
|
this entry. The deltabase is the next delta in
|
|
|
|
the chain, with the chain eventually
|
|
|
|
terminating in a full-text, represented by a
|
|
|
|
deltabase offset of -1. This lets us compute
|
|
|
|
delta chains from the index, then do
|
|
|
|
sequential reads from the pack if the revision
|
|
|
|
are nearby on disk.)
|
|
|
|
- pack entry offset (The location of this entry in the datapack)
|
|
|
|
- pack content size (The on-disk length of this entry's pack data)
|
|
|
|
|
|
|
|
The fanout is a quick lookup table to reduce the number of steps for
|
|
|
|
bisecting the index. It is a series of 4 byte pointers to positions
|
|
|
|
within the index. It has 2^16 entries, which corresponds to hash
|
|
|
|
prefixes [0000, 0001,..., FFFE, FFFF]. Example: the pointer in slot
|
|
|
|
4F0A points to the index position of the first revision whose node
|
|
|
|
starts with 4F0A. This saves log(2^16)=16 bisect steps.
|
|
|
|
|
|
|
|
dataidx = <fanouttable>
|
|
|
|
<index>
|
|
|
|
fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries)
|
|
|
|
index = [<index entry>,...]
|
|
|
|
indexentry = <node: 20 byte>
|
|
|
|
<deltabase location: 4 byte signed int>
|
|
|
|
<pack entry offset: 8 byte unsigned int>
|
|
|
|
<pack entry size: 8 byte unsigned int>
|
2017-04-27 05:50:36 +03:00
|
|
|
|
|
|
|
[1]: new in version 1.
|
2016-04-28 02:49:09 +03:00
|
|
|
"""
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2016-05-20 19:31:37 +03:00
|
|
|
INDEXSUFFIX = INDEXSUFFIX
|
|
|
|
PACKSUFFIX = PACKSUFFIX
|
2017-04-26 23:34:15 +03:00
|
|
|
|
2017-04-27 05:50:36 +03:00
|
|
|
# v[01] index format: <node><delta offset><pack data offset><pack data size>
|
2017-04-26 23:34:15 +03:00
|
|
|
INDEXFORMAT = datapack.INDEXFORMAT
|
|
|
|
INDEXENTRYLENGTH = datapack.INDEXENTRYLENGTH
|
2016-05-03 22:34:45 +03:00
|
|
|
|
2017-04-27 05:50:36 +03:00
|
|
|
# v1 has metadata support
|
|
|
|
SUPPORTED_VERSIONS = [0, 1]
|
|
|
|
|
|
|
|
def add(self, name, node, deltabasenode, delta, metadata=None):
|
|
|
|
# metadata is a dict, ex. {METAKEYFLAG: flag}
|
2018-05-30 12:16:33 +03:00
|
|
|
if len(name) > 2 ** 16:
|
2016-04-28 02:49:09 +03:00
|
|
|
raise RuntimeError(_("name too long %s") % name)
|
|
|
|
if len(node) != 20:
|
|
|
|
raise RuntimeError(_("node should be 20 bytes %s") % node)
|
|
|
|
|
|
|
|
if node in self.entries:
|
|
|
|
# The revision has already been added
|
|
|
|
return
|
|
|
|
|
|
|
|
# TODO: allow configurable compression
|
2017-03-18 00:02:26 +03:00
|
|
|
delta = lz4compress(delta)
|
2017-04-27 05:50:36 +03:00
|
|
|
|
2018-12-21 04:48:27 +03:00
|
|
|
rawdata = "%s%s%s%s%s%s" % (
|
|
|
|
struct.pack("!H", len(name)), # unsigned 2 byte int
|
|
|
|
name,
|
|
|
|
node,
|
|
|
|
deltabasenode,
|
|
|
|
struct.pack("!Q", len(delta)), # unsigned 8 byte int
|
|
|
|
delta,
|
2018-05-30 12:16:33 +03:00
|
|
|
)
|
2017-04-27 05:50:36 +03:00
|
|
|
|
|
|
|
if self.VERSION == 1:
|
2017-05-02 05:03:25 +03:00
|
|
|
# v1 support metadata
|
|
|
|
rawmeta = shallowutil.buildpackmeta(metadata)
|
2018-05-30 12:16:33 +03:00
|
|
|
rawdata += struct.pack("!I", len(rawmeta)) # unsigned 4 byte
|
2017-04-27 05:50:36 +03:00
|
|
|
rawdata += rawmeta
|
2017-05-02 05:03:25 +03:00
|
|
|
else:
|
|
|
|
# v0 cannot store metadata, raise if metadata contains flag
|
|
|
|
if metadata and metadata.get(constants.METAKEYFLAG, 0) != 0:
|
2018-05-30 12:16:33 +03:00
|
|
|
raise error.ProgrammingError("v0 pack cannot store flags")
|
2016-04-28 02:49:09 +03:00
|
|
|
|
|
|
|
offset = self.packfp.tell()
|
|
|
|
|
|
|
|
size = len(rawdata)
|
|
|
|
|
|
|
|
self.entries[node] = (deltabasenode, offset, size)
|
|
|
|
|
|
|
|
self.writeraw(rawdata)
|
|
|
|
|
2017-05-03 20:19:45 +03:00
|
|
|
def createindex(self, nodelocations, indexoffset):
|
2018-05-30 12:16:33 +03:00
|
|
|
entries = sorted((n, db, o, s) for n, (db, o, s) in self.entries.iteritems())
|
2016-04-28 02:49:09 +03:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
rawindex = ""
|
2017-04-26 23:34:15 +03:00
|
|
|
fmt = self.INDEXFORMAT
|
2016-04-28 02:49:09 +03:00
|
|
|
for node, deltabase, offset, size in entries:
|
|
|
|
if deltabase == nullid:
|
|
|
|
deltabaselocation = FULLTEXTINDEXMARK
|
|
|
|
else:
|
|
|
|
# Instead of storing the deltabase node in the index, let's
|
|
|
|
# store a pointer directly to the index entry for the deltabase.
|
2018-05-30 12:16:33 +03:00
|
|
|
deltabaselocation = nodelocations.get(deltabase, NOBASEINDEXMARK)
|
2016-04-28 02:49:09 +03:00
|
|
|
|
2017-04-26 23:34:15 +03:00
|
|
|
entry = struct.pack(fmt, node, deltabaselocation, offset, size)
|
2016-04-28 02:49:09 +03:00
|
|
|
rawindex += entry
|
|
|
|
|
2016-05-20 19:31:37 +03:00
|
|
|
return rawindex
|
2018-02-13 21:12:51 +03:00
|
|
|
|
|
|
|
def get(self, name, node):
|
|
|
|
raise RuntimeError("must use getdeltachain with mutabledatapack")
|
|
|
|
|
|
|
|
def getmeta(self, name, node):
|
|
|
|
delta, deltaname, deltabasenode, meta = self.getdelta(name, node)
|
|
|
|
return meta
|
|
|
|
|
|
|
|
def getdelta(self, name, node):
|
|
|
|
value = self.entries.get(node)
|
|
|
|
if value is None:
|
|
|
|
raise KeyError(name, hex(node))
|
|
|
|
|
|
|
|
deltabasenode, offset, size = self.entries[node]
|
|
|
|
|
|
|
|
try:
|
|
|
|
# Seek to data
|
|
|
|
self.packfp.seek(offset, os.SEEK_SET)
|
|
|
|
data = self.packfp.read(size)
|
|
|
|
finally:
|
|
|
|
# Seek back to the end
|
|
|
|
self.packfp.seek(0, os.SEEK_END)
|
|
|
|
|
|
|
|
entry = _readdataentry(data, self.VERSION, getmeta=True)
|
|
|
|
filename, node, deltabasenode, delta, meta = entry
|
|
|
|
return delta, filename, deltabasenode, meta
|
|
|
|
|
|
|
|
def getdeltachain(self, name, node):
|
|
|
|
deltachain = []
|
|
|
|
while node != nullid:
|
|
|
|
try:
|
|
|
|
value = self.getdelta(name, node)
|
|
|
|
delta, deltaname, deltabasenode, meta = value
|
|
|
|
deltachain.append((name, node, deltaname, deltabasenode, delta))
|
|
|
|
name = deltaname
|
|
|
|
node = deltabasenode
|
|
|
|
except KeyError:
|
|
|
|
# If we don't even have the first entry, throw. Otherwise return
|
|
|
|
# what we have
|
|
|
|
if not deltachain:
|
|
|
|
raise
|
|
|
|
break
|
|
|
|
|
|
|
|
return deltachain
|
|
|
|
|
|
|
|
def getmissing(self, keys):
|
|
|
|
missing = []
|
|
|
|
for name, node in keys:
|
|
|
|
value = self.entries.get(node)
|
|
|
|
if value is None:
|
|
|
|
missing.append((name, node))
|
|
|
|
|
|
|
|
return missing
|
2018-03-15 23:39:59 +03:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2018-03-15 23:39:59 +03:00
|
|
|
class memdatapack(object):
|
|
|
|
def __init__(self):
|
|
|
|
self.data = {}
|
|
|
|
self.meta = {}
|
|
|
|
|
|
|
|
def add(self, name, node, deltabase, delta):
|
|
|
|
self.data[(name, node)] = (deltabase, delta)
|
|
|
|
|
|
|
|
def getdelta(self, name, node):
|
|
|
|
deltabase, delta = self.data[(name, node)]
|
2018-05-30 12:16:33 +03:00
|
|
|
return (delta, name, deltabase, self.getmeta(name, node))
|
2018-03-15 23:39:59 +03:00
|
|
|
|
|
|
|
def getdeltachain(self, name, node):
|
|
|
|
deltabase, delta = self.data[(name, node)]
|
|
|
|
return [(name, node, name, deltabase, delta)]
|
|
|
|
|
|
|
|
def getmeta(self, name, node):
|
|
|
|
return self.meta[(name, node)]
|
|
|
|
|
|
|
|
def getmissing(self, keys):
|
|
|
|
missing = []
|
|
|
|
for key in keys:
|
|
|
|
if key not in self.data:
|
|
|
|
missing.append(key)
|
|
|
|
return missing
|