mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 01:07:15 +03:00
d3c6def7b8
Summary: Treemanifest had a bug where the pack files it created were 400 instead of 444. This meant people sharing a cache on the same machine couldn't access them. In most of the remotefilelog code we had set opener.createmode before creating the pack but we forgot to for treemanifest. This patch moves the opener creation and createmode setting into the mutable pack class so we can't screw this up again. Test Plan: Tests that wrote to the packs before, now failed and had to be updated to chmod the packs before writing to them. Reviewers: #mercurial, rmcelroy Reviewed By: rmcelroy Subscribers: rmcelroy, mjpieters Differential Revision: https://phabricator.intern.facebook.com/D4411580 Tasks: 15469140 Signature: t1:4411580:1484321293:9aa78254677548a6dc2270c58cee0ec6f57dd089
375 lines
13 KiB
Python
375 lines
13 KiB
Python
import hashlib, struct
|
|
from mercurial import util
|
|
from mercurial.node import hex
|
|
import basepack, constants
|
|
|
|
# (filename hash, offset, size)
|
|
INDEXFORMAT = '!20sQQ'
|
|
INDEXENTRYLENGTH = 36
|
|
NODELENGTH = 20
|
|
|
|
# (node, p1, p2, linknode)
|
|
PACKFORMAT = "!20s20s20s20sH"
|
|
PACKENTRYLENGTH = 82
|
|
|
|
OFFSETSIZE = 4
|
|
|
|
INDEXSUFFIX = '.histidx'
|
|
PACKSUFFIX = '.histpack'
|
|
|
|
ANC_NODE = 0
|
|
ANC_P1NODE = 1
|
|
ANC_P2NODE = 2
|
|
ANC_LINKNODE = 3
|
|
ANC_COPYFROM = 4
|
|
|
|
class historypackstore(basepack.basepackstore):
|
|
INDEXSUFFIX = INDEXSUFFIX
|
|
PACKSUFFIX = PACKSUFFIX
|
|
|
|
def getpack(self, path):
|
|
return historypack(path)
|
|
|
|
def getancestors(self, name, node):
|
|
for pack in self.packs:
|
|
try:
|
|
return pack.getancestors(name, node)
|
|
except KeyError:
|
|
pass
|
|
|
|
for pack in self.refresh():
|
|
try:
|
|
return pack.getancestors(name, node)
|
|
except KeyError:
|
|
pass
|
|
|
|
raise KeyError((name, node))
|
|
|
|
def add(self, filename, node, p1, p2, linknode, copyfrom):
|
|
raise RuntimeError("cannot add to historypackstore (%s:%s)"
|
|
% (filename, hex(node)))
|
|
|
|
class historypack(basepack.basepack):
|
|
INDEXSUFFIX = INDEXSUFFIX
|
|
PACKSUFFIX = PACKSUFFIX
|
|
|
|
def getmissing(self, keys):
|
|
missing = []
|
|
for name, node in keys:
|
|
try:
|
|
section = self._findsection(name)
|
|
self._findnode(section, node)
|
|
except KeyError:
|
|
missing.append((name, node))
|
|
|
|
return missing
|
|
|
|
def getancestors(self, name, node):
|
|
"""Returns as many ancestors as we're aware of.
|
|
|
|
return value: {
|
|
node: (p1, p2, linknode, copyfrom),
|
|
...
|
|
}
|
|
"""
|
|
filename, offset, size = self._findsection(name)
|
|
ancestors = set((node,))
|
|
data = self._data[offset:offset + size]
|
|
results = {}
|
|
o = 0
|
|
while o < len(data):
|
|
entry = struct.unpack(PACKFORMAT, data[o:o + PACKENTRYLENGTH])
|
|
o += PACKENTRYLENGTH
|
|
copyfrom = None
|
|
copyfromlen = entry[ANC_COPYFROM]
|
|
if copyfromlen != 0:
|
|
copyfrom = data[o:o + copyfromlen]
|
|
o += copyfromlen
|
|
|
|
if entry[ANC_NODE] in ancestors:
|
|
ancestors.add(entry[ANC_P1NODE])
|
|
ancestors.add(entry[ANC_P2NODE])
|
|
result = (entry[ANC_P1NODE],
|
|
entry[ANC_P2NODE],
|
|
entry[ANC_LINKNODE],
|
|
copyfrom)
|
|
results[entry[ANC_NODE]] = result
|
|
|
|
self._pagedin += PACKENTRYLENGTH
|
|
|
|
# If we've read a lot of data from the mmap, free some memory.
|
|
self.freememory()
|
|
|
|
if not results:
|
|
raise KeyError((name, node))
|
|
return results
|
|
|
|
def add(self, filename, node, p1, p2, linknode, copyfrom):
|
|
raise RuntimeError("cannot add to historypack (%s:%s)" %
|
|
(filename, hex(node)))
|
|
|
|
def _findnode(self, section, node):
|
|
name, offset, size = section
|
|
data = self._data
|
|
o = offset
|
|
while o < offset + size:
|
|
entry = struct.unpack(PACKFORMAT,
|
|
data[o:o + PACKENTRYLENGTH])
|
|
o += PACKENTRYLENGTH
|
|
|
|
if entry[0] == node:
|
|
copyfrom = None
|
|
copyfromlen = entry[ANC_COPYFROM]
|
|
if copyfromlen != 0:
|
|
copyfrom = data[o:o + copyfromlen]
|
|
|
|
# If we've read a lot of data from the mmap, free some memory.
|
|
self._pagedin += o - offset
|
|
self.freememory()
|
|
return (entry[ANC_P1NODE],
|
|
entry[ANC_P2NODE],
|
|
entry[ANC_LINKNODE],
|
|
copyfrom)
|
|
|
|
o += entry[ANC_COPYFROM]
|
|
|
|
raise KeyError("unable to find history for %s:%s" % (name, hex(node)))
|
|
|
|
def _findsection(self, name):
|
|
params = self.params
|
|
namehash = hashlib.sha1(name).digest()
|
|
fanoutkey = struct.unpack(params.fanoutstruct,
|
|
namehash[:params.fanoutprefix])[0]
|
|
fanout = self._fanouttable
|
|
|
|
start = fanout[fanoutkey] + params.indexstart
|
|
for i in xrange(fanoutkey + 1, params.fanoutcount):
|
|
end = fanout[i] + params.indexstart
|
|
if end != start:
|
|
break
|
|
else:
|
|
end = self.indexsize
|
|
|
|
# Bisect between start and end to find node
|
|
startnode = self._index[start:start + NODELENGTH]
|
|
endnode = self._index[end:end + NODELENGTH]
|
|
if startnode == namehash:
|
|
entry = self._index[start:start + INDEXENTRYLENGTH]
|
|
elif endnode == namehash:
|
|
entry = self._index[end:end + INDEXENTRYLENGTH]
|
|
else:
|
|
iteration = 0
|
|
while start < end - INDEXENTRYLENGTH:
|
|
iteration += 1
|
|
mid = start + (end - start) / 2
|
|
mid = mid - ((mid - params.indexstart) % INDEXENTRYLENGTH)
|
|
midnode = self._index[mid:mid + NODELENGTH]
|
|
if midnode == namehash:
|
|
entry = self._index[mid:mid + INDEXENTRYLENGTH]
|
|
break
|
|
if namehash > midnode:
|
|
start = mid
|
|
startnode = midnode
|
|
elif namehash < midnode:
|
|
end = mid
|
|
endnode = midnode
|
|
else:
|
|
raise KeyError(name)
|
|
|
|
filenamehash, offset, size = struct.unpack(INDEXFORMAT, entry)
|
|
filenamelength = struct.unpack('!H', self._data[offset:offset +
|
|
constants.FILENAMESIZE])[0]
|
|
offset += constants.FILENAMESIZE
|
|
|
|
actualname = self._data[offset:offset + filenamelength]
|
|
offset += filenamelength
|
|
|
|
if name != actualname:
|
|
raise KeyError("found file name %s when looking for %s" %
|
|
(actualname, name))
|
|
|
|
struct.unpack('!I', self._data[offset:offset +
|
|
OFFSETSIZE])[0]
|
|
offset += OFFSETSIZE
|
|
|
|
return (name, offset, size - constants.FILENAMESIZE - filenamelength
|
|
- OFFSETSIZE)
|
|
|
|
def markledger(self, ledger):
|
|
for filename, node in self:
|
|
ledger.markhistoryentry(self, filename, node)
|
|
|
|
def cleanup(self, ledger):
|
|
entries = ledger.sources.get(self, [])
|
|
allkeys = set(self)
|
|
repackedkeys = set((e.filename, e.node) for e in entries if
|
|
e.historyrepacked)
|
|
|
|
if len(allkeys - repackedkeys) == 0:
|
|
if self.path not in ledger.created:
|
|
util.unlinkpath(self.indexpath, ignoremissing=True)
|
|
util.unlinkpath(self.packpath, ignoremissing=True)
|
|
|
|
def __iter__(self):
|
|
for f, n, x, x, x, x in self.iterentries():
|
|
yield f, n
|
|
|
|
def iterentries(self):
|
|
# Start at 1 to skip the header
|
|
offset = 1
|
|
while offset < self.datasize:
|
|
data = self._data
|
|
# <2 byte len> + <filename>
|
|
filenamelen = struct.unpack('!H', data[offset:offset +
|
|
constants.FILENAMESIZE])[0]
|
|
assert (filenamelen > 0)
|
|
offset += constants.FILENAMESIZE
|
|
filename = data[offset:offset + filenamelen]
|
|
offset += filenamelen
|
|
|
|
revcount = struct.unpack('!I', data[offset:offset +
|
|
OFFSETSIZE])[0]
|
|
offset += OFFSETSIZE
|
|
|
|
for i in xrange(revcount):
|
|
entry = struct.unpack(PACKFORMAT, data[offset:offset +
|
|
PACKENTRYLENGTH])
|
|
offset += PACKENTRYLENGTH
|
|
|
|
copyfrom = data[offset:offset + entry[ANC_COPYFROM]]
|
|
offset += entry[ANC_COPYFROM]
|
|
|
|
yield (filename, entry[ANC_NODE], entry[ANC_P1NODE],
|
|
entry[ANC_P2NODE], entry[ANC_LINKNODE], copyfrom)
|
|
|
|
self._pagedin += PACKENTRYLENGTH
|
|
|
|
# If we've read a lot of data from the mmap, free some memory.
|
|
self.freememory()
|
|
|
|
class mutablehistorypack(basepack.mutablebasepack):
|
|
"""A class for constructing and serializing a histpack file and index.
|
|
|
|
A history pack is a pair of files that contain the revision history for
|
|
various file revisions in Mercurial. It contains only revision history (like
|
|
parent pointers and linknodes), not any revision content information.
|
|
|
|
It consists of two files, with the following format:
|
|
|
|
.histpack
|
|
The pack itself is a series of file revisions with some basic header
|
|
information on each.
|
|
|
|
datapack = <version: 1 byte>
|
|
[<filesection>,...]
|
|
filesection = <filename len: 2 byte unsigned int>
|
|
<filename>
|
|
<revision count: 4 byte unsigned int>
|
|
[<revision>,...]
|
|
revision = <node: 20 byte>
|
|
<p1node: 20 byte>
|
|
<p2node: 20 byte>
|
|
<linknode: 20 byte>
|
|
<copyfromlen: 2 byte>
|
|
<copyfrom>
|
|
|
|
The revisions within each filesection are stored in topological order
|
|
(newest first). If a given entry has a parent from another file (a copy)
|
|
then p1node is the node from the other file, and copyfrom is the
|
|
filepath of the other file.
|
|
|
|
.histidx
|
|
The index file provides a mapping from filename to the file section in
|
|
the histpack. It consists of two parts, the fanout and the index.
|
|
|
|
The index is a list of index entries, sorted by filename hash (one per
|
|
file section in the pack). Each entry has:
|
|
|
|
- node (The 20 byte hash of the filename)
|
|
- pack entry offset (The location of this file section in the histpack)
|
|
- pack content size (The on-disk length of this file section's pack
|
|
data)
|
|
|
|
The fanout is a quick lookup table to reduce the number of steps for
|
|
bisecting the index. It is a series of 4 byte pointers to positions
|
|
within the index. It has 2^16 entries, which corresponds to hash
|
|
prefixes [00, 01, 02,..., FD, FE, FF]. Example: the pointer in slot 4F
|
|
points to the index position of the first revision whose node starts
|
|
with 4F. This saves log(2^16) bisect steps.
|
|
|
|
dataidx = <fanouttable>
|
|
<index>
|
|
fanouttable = [<index offset: 4 byte unsigned int>,...] (2^16 entries)
|
|
index = [<index entry>,...]
|
|
indexentry = <node: 20 byte>
|
|
<pack file section offset: 8 byte unsigned int>
|
|
<pack file section size: 8 byte unsigned int>
|
|
"""
|
|
INDEXSUFFIX = INDEXSUFFIX
|
|
PACKSUFFIX = PACKSUFFIX
|
|
INDEXENTRYLENGTH = INDEXENTRYLENGTH
|
|
|
|
def __init__(self, ui, packpath):
|
|
super(mutablehistorypack, self).__init__(ui, packpath)
|
|
self.pastfiles = {}
|
|
self.currentfile = None
|
|
self.currententries = []
|
|
|
|
def add(self, filename, node, p1, p2, linknode, copyfrom):
|
|
if filename != self.currentfile:
|
|
if filename in self.pastfiles:
|
|
raise RuntimeError("cannot add file node after another file's "
|
|
"nodes have been added")
|
|
if self.currentfile:
|
|
self._writependingsection()
|
|
|
|
self.currentfile = filename
|
|
self.currententries = []
|
|
|
|
copyfrom = copyfrom or ''
|
|
copyfromlen = struct.pack('!H', len(copyfrom))
|
|
self.currententries.append((node, p1, p2, linknode, copyfromlen,
|
|
copyfrom))
|
|
|
|
def _writependingsection(self):
|
|
filename = self.currentfile
|
|
sectionstart = self.packfp.tell()
|
|
|
|
# Write the file section header
|
|
self.writeraw("%s%s%s" % (
|
|
struct.pack('!H', len(filename)),
|
|
filename,
|
|
struct.pack('!I', len(self.currententries)),
|
|
))
|
|
sectionlen = constants.FILENAMESIZE + len(filename) + 4
|
|
|
|
# Write the file section content
|
|
rawdata = ''.join('%s%s%s%s%s%s' % e for e in self.currententries)
|
|
sectionlen += len(rawdata)
|
|
|
|
self.writeraw(rawdata)
|
|
|
|
self.pastfiles[filename] = (sectionstart, sectionlen)
|
|
node = hashlib.sha1(filename).digest()
|
|
self.entries[node] = node
|
|
|
|
def close(self, ledger=None):
|
|
if self._closed:
|
|
return
|
|
|
|
if self.currentfile:
|
|
self._writependingsection()
|
|
|
|
return super(mutablehistorypack, self).close(ledger=ledger)
|
|
|
|
def createindex(self, nodelocations):
|
|
files = ((hashlib.sha1(filename).digest(), offset, size)
|
|
for filename, (offset, size) in self.pastfiles.iteritems())
|
|
files = sorted(files)
|
|
|
|
rawindex = ""
|
|
for namehash, offset, size in files:
|
|
rawindex += struct.pack(INDEXFORMAT, namehash, offset, size)
|
|
|
|
return rawindex
|