mirror of
https://github.com/facebook/sapling.git
synced 2024-10-07 15:27:13 +03:00
Implement data inlined with the index file
This patch allows you to optionally inline data bytes with the revlog index file. It saves considerable space and checkout time by reducing the number of inodes, wasted partial blocks and system calls. To use the inline data add this to your .hgrc [revlog] # inline data only works with revlogng format=1 # inline is the only valid flag right now. flags=inline
This commit is contained in:
parent
ed26ff0cae
commit
e7ddbb442b
@ -43,12 +43,23 @@ class localrepository(object):
|
||||
|
||||
v = self.ui.revlogopts
|
||||
self.revlogversion = int(v.get('format', 0))
|
||||
flags = 0
|
||||
for x in v.get('flags', "").split():
|
||||
self.revlogversion |= revlog.flagstr(x)
|
||||
flags |= revlog.flagstr(x)
|
||||
|
||||
v = self.revlogversion | flags
|
||||
self.manifest = manifest.manifest(self.opener, v)
|
||||
self.changelog = changelog.changelog(self.opener, v)
|
||||
|
||||
# the changelog might not have the inline index flag
|
||||
# on. If the format of the changelog is the same as found in
|
||||
# .hgrc, apply any flags found in the .hgrc as well.
|
||||
# Otherwise, just version from the changelog
|
||||
v = self.changelog.version
|
||||
if v == self.revlogversion:
|
||||
v |= flags
|
||||
self.revlogversion = v
|
||||
|
||||
self.manifest = manifest.manifest(self.opener, self.revlogversion)
|
||||
self.changelog = changelog.changelog(self.opener, self.revlogversion)
|
||||
self.revlogversion = self.changelog.version
|
||||
self.tagscache = None
|
||||
self.nodetagscache = None
|
||||
self.encodepats = None
|
||||
|
@ -20,6 +20,14 @@ demandload(globals(), "sha struct zlib")
|
||||
REVLOGV0 = 0
|
||||
REVLOGNG = 1
|
||||
|
||||
# revlog flags
|
||||
REVLOGNGINLINEDATA = (1 << 16)
|
||||
|
||||
def flagstr(flag):
|
||||
if flag == "inline":
|
||||
return REVLOGNGINLINEDATA
|
||||
raise RevlogError(_("unknown revlog flag %s" % flag))
|
||||
|
||||
def hash(text, p1, p2):
|
||||
"""generate a hash from the given text and its parent hashes
|
||||
|
||||
@ -234,13 +242,19 @@ class revlog(object):
|
||||
self.indexstat = st
|
||||
if len(i) > 0:
|
||||
v = struct.unpack(versionformat, i[:4])[0]
|
||||
if v != 0:
|
||||
flags = v & ~0xFFFF
|
||||
fmt = v & 0xFFFF
|
||||
if fmt != REVLOGNG or (flags & ~(REVLOGNGINLINEDATA)):
|
||||
raise RevlogError(
|
||||
_("unknown version format %d or flags %x on %s") %
|
||||
(v, flags, self.indexfile))
|
||||
flags = v & ~0xFFFF
|
||||
fmt = v & 0xFFFF
|
||||
if fmt == 0:
|
||||
if flags:
|
||||
raise RevlogError(_("index %s invalid flags %x for format v0" %
|
||||
(self.indexfile, flags)))
|
||||
elif fmt == REVLOGNG:
|
||||
if flags & ~REVLOGNGINLINEDATA:
|
||||
raise RevlogError(_("index %s invalid flags %x for revlogng" %
|
||||
(self.indexfile, flags)))
|
||||
else:
|
||||
raise RevlogError(_("index %s invalid format %d" %
|
||||
(self.indexfile, fmt)))
|
||||
self.version = v
|
||||
if v == 0:
|
||||
self.indexformat = indexformatv0
|
||||
@ -248,13 +262,17 @@ class revlog(object):
|
||||
self.indexformat = indexformatng
|
||||
|
||||
if i:
|
||||
if st and st.st_size > 10000:
|
||||
if not self.inlinedata() and st and st.st_size > 10000:
|
||||
# big index, let's parse it on demand
|
||||
parser = lazyparser(i, self, self.indexformat)
|
||||
self.index = lazyindex(parser)
|
||||
self.nodemap = lazymap(parser)
|
||||
else:
|
||||
self.parseindex(i)
|
||||
if self.inlinedata():
|
||||
# we've already got the entire data file read in, save it
|
||||
# in the chunk data
|
||||
self.chunkcache = (0, i)
|
||||
if self.version != 0:
|
||||
e = list(self.index[0])
|
||||
type = self.ngtype(e[0])
|
||||
@ -270,6 +288,7 @@ class revlog(object):
|
||||
l = len(data)
|
||||
self.index = []
|
||||
self.nodemap = {nullid: -1}
|
||||
inline = self.inlinedata()
|
||||
off = 0
|
||||
n = 0
|
||||
while off < l:
|
||||
@ -278,6 +297,8 @@ class revlog(object):
|
||||
self.nodemap[e[-1]] = n
|
||||
n += 1
|
||||
off += s
|
||||
if inline:
|
||||
off += e[1]
|
||||
|
||||
def ngoffset(self, q):
|
||||
if q & 0xFFFF:
|
||||
@ -297,6 +318,7 @@ class revlog(object):
|
||||
p = self.index.p
|
||||
p.load()
|
||||
|
||||
def inlinedata(self): return self.version & REVLOGNGINLINEDATA
|
||||
def tip(self): return self.node(len(self.index) - 1)
|
||||
def count(self): return len(self.index)
|
||||
def node(self, rev):
|
||||
@ -568,11 +590,17 @@ class revlog(object):
|
||||
|
||||
def chunk(self, rev, df=None, cachelen=4096):
|
||||
start, length = self.start(rev), self.length(rev)
|
||||
inline = self.inlinedata()
|
||||
if inline:
|
||||
start += (rev + 1) * struct.calcsize(self.indexformat)
|
||||
end = start + length
|
||||
def loadcache(df):
|
||||
cache_length = max(cachelen, length) # 4k
|
||||
if not df:
|
||||
df = self.opener(self.datafile)
|
||||
if inline:
|
||||
df = self.opener(self.indexfile)
|
||||
else:
|
||||
df = self.opener(self.datafile)
|
||||
df.seek(start)
|
||||
self.chunkcache = (start, df.read(cache_length))
|
||||
|
||||
@ -620,7 +648,11 @@ class revlog(object):
|
||||
rev = self.rev(node)
|
||||
base = self.base(rev)
|
||||
|
||||
df = self.opener(self.datafile)
|
||||
if self.inlinedata():
|
||||
# we probably have the whole chunk cached
|
||||
df = None
|
||||
else:
|
||||
df = self.opener(self.datafile)
|
||||
|
||||
# do we have useful data cached?
|
||||
if self.cache and self.cache[1] >= base and self.cache[1] < rev:
|
||||
@ -643,6 +675,40 @@ class revlog(object):
|
||||
self.cache = (node, rev, text)
|
||||
return text
|
||||
|
||||
def checkinlinesize(self, fp, tr):
|
||||
if not self.inlinedata():
|
||||
return
|
||||
size = fp.tell()
|
||||
if size < 131072:
|
||||
return
|
||||
tr.add(self.datafile, 0)
|
||||
df = self.opener(self.datafile, 'w')
|
||||
calc = struct.calcsize(self.indexformat)
|
||||
for r in xrange(self.count()):
|
||||
start = self.start(r) + (r + 1) * calc
|
||||
length = self.length(r)
|
||||
fp.seek(start)
|
||||
d = fp.read(length)
|
||||
df.write(d)
|
||||
fp.close()
|
||||
df.close()
|
||||
fp = self.opener(self.indexfile, 'w', atomic=True)
|
||||
self.version &= ~(REVLOGNGINLINEDATA)
|
||||
if self.count():
|
||||
x = self.index[0]
|
||||
e = struct.pack(self.indexformat, *x)[4:]
|
||||
l = struct.pack(versionformat, self.version)
|
||||
fp.write(l)
|
||||
fp.write(e)
|
||||
|
||||
for i in xrange(1, self.count()):
|
||||
x = self.index[i]
|
||||
e = struct.pack(self.indexformat, *x)
|
||||
fp.write(e)
|
||||
|
||||
fp.close()
|
||||
self.chunkcache = None
|
||||
|
||||
def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
|
||||
"""add a revision to the log
|
||||
|
||||
@ -698,13 +764,17 @@ class revlog(object):
|
||||
self.nodemap[node] = n
|
||||
entry = struct.pack(self.indexformat, *e)
|
||||
|
||||
transaction.add(self.datafile, offset)
|
||||
transaction.add(self.indexfile, n * len(entry))
|
||||
f = self.opener(self.datafile, "a")
|
||||
if data[0]:
|
||||
f.write(data[0])
|
||||
f.write(data[1])
|
||||
f = self.opener(self.indexfile, "a")
|
||||
if not self.inlinedata():
|
||||
transaction.add(self.datafile, offset)
|
||||
transaction.add(self.indexfile, n * len(entry))
|
||||
f = self.opener(self.datafile, "a")
|
||||
if data[0]:
|
||||
f.write(data[0])
|
||||
f.write(data[1])
|
||||
f = self.opener(self.indexfile, "a")
|
||||
else:
|
||||
f = self.opener(self.indexfile, "a+")
|
||||
transaction.add(self.indexfile, f.tell())
|
||||
|
||||
if len(self.index) == 1 and self.version != 0:
|
||||
l = struct.pack(versionformat, self.version)
|
||||
@ -713,6 +783,11 @@ class revlog(object):
|
||||
|
||||
f.write(entry)
|
||||
|
||||
if self.inlinedata():
|
||||
f.write(data[0])
|
||||
f.write(data[1])
|
||||
self.checkinlinesize(f, transaction)
|
||||
|
||||
self.cache = (node, n, text)
|
||||
return node
|
||||
|
||||
@ -830,8 +905,11 @@ class revlog(object):
|
||||
|
||||
ifh = self.opener(self.indexfile, "a+")
|
||||
transaction.add(self.indexfile, ifh.tell())
|
||||
transaction.add(self.datafile, end)
|
||||
dfh = self.opener(self.datafile, "a")
|
||||
if self.inlinedata():
|
||||
dfh = None
|
||||
else:
|
||||
transaction.add(self.datafile, end)
|
||||
dfh = self.opener(self.datafile, "a")
|
||||
|
||||
# loop through our set of deltas
|
||||
chain = None
|
||||
@ -885,8 +963,21 @@ class revlog(object):
|
||||
link, self.rev(p1), self.rev(p2), node)
|
||||
self.index.append(e)
|
||||
self.nodemap[node] = r
|
||||
dfh.write(cdelta)
|
||||
ifh.write(struct.pack(self.indexformat, *e))
|
||||
if self.inlinedata():
|
||||
ifh.write(struct.pack(self.indexformat, *e))
|
||||
ifh.write(cdelta)
|
||||
self.checkinlinesize(ifh, transaction)
|
||||
if not self.inlinedata():
|
||||
dfh = self.opener(self.datafile, "a")
|
||||
ifh = self.opener(self.indexfile, "a")
|
||||
else:
|
||||
if not dfh:
|
||||
# addrevision switched from inline to conventional
|
||||
# reopen the index
|
||||
dfh = self.opener(self.datafile, "a")
|
||||
ifh = self.opener(self.indexfile, "a")
|
||||
dfh.write(cdelta)
|
||||
ifh.write(struct.pack(self.indexformat, *e))
|
||||
|
||||
t, r, chain, prev = r, r + 1, node, node
|
||||
base = self.base(t)
|
||||
@ -915,9 +1006,12 @@ class revlog(object):
|
||||
|
||||
# first truncate the files on disk
|
||||
end = self.start(rev)
|
||||
df = self.opener(self.datafile, "a")
|
||||
df.truncate(end)
|
||||
end = rev * struct.calcsize(self.indexformat)
|
||||
if not self.inlinedata():
|
||||
df = self.opener(self.datafile, "a")
|
||||
df.truncate(end)
|
||||
end = rev * struct.calcsize(self.indexformat)
|
||||
else:
|
||||
end += rev * struct.calcsize(self.indexformat)
|
||||
|
||||
indexf = self.opener(self.indexfile, "a")
|
||||
indexf.truncate(end)
|
||||
@ -952,6 +1046,12 @@ class revlog(object):
|
||||
s = struct.calcsize(self.indexformat)
|
||||
i = actual / s
|
||||
di = actual - (i * s)
|
||||
if self.inlinedata():
|
||||
databytes = 0
|
||||
for r in xrange(self.count()):
|
||||
databytes += self.length(r)
|
||||
dd = 0
|
||||
di = actual - self.count() * s - databytes
|
||||
except IOError, inst:
|
||||
if inst.errno != errno.ENOENT:
|
||||
raise
|
||||
|
Loading…
Reference in New Issue
Block a user