mirror of
https://github.com/facebook/sapling.git
synced 2024-10-07 07:17:55 +03:00
Optimize manifest.add
Testing shows that manifest.add is spending a significant percentage of its time running calcoffsets and doing text = "".join(addlist). This patch removes the need for both of these by storying the manifest in a character array, and using a modified bisect search to find lines without the help of a separate index of line offsets. manifest.add was also reworked to push delta construction/combination into the main loop. Time to apply 2751 patches (without psyco, ext3 noatime,data=writeback): Stock hg: 4m45s real 3m32s user 55s sys patched: 2m48s real 1m53s user 43s sys quilt: 2m30s real 45s user 50s sys (quilt does much more io...)
This commit is contained in:
parent
4813e5d3d3
commit
5bb76dbbf2
@ -9,13 +9,12 @@ import sys, struct
|
||||
from revlog import *
|
||||
from i18n import gettext as _
|
||||
from demandload import *
|
||||
demandload(globals(), "bisect")
|
||||
demandload(globals(), "bisect array")
|
||||
|
||||
class manifest(revlog):
|
||||
def __init__(self, opener):
|
||||
self.mapcache = None
|
||||
self.listcache = None
|
||||
self.addlist = None
|
||||
revlog.__init__(self, opener, "00manifest.i", "00manifest.d")
|
||||
|
||||
def read(self, node):
|
||||
@ -25,8 +24,9 @@ class manifest(revlog):
|
||||
text = self.revision(node)
|
||||
map = {}
|
||||
flag = {}
|
||||
self.listcache = (text, text.splitlines(1))
|
||||
for l in self.listcache[1]:
|
||||
self.listcache = array.array('c', text)
|
||||
lines = text.splitlines(1)
|
||||
for l in lines:
|
||||
(f, n) = l.split('\0')
|
||||
map[f] = bin(n[:40])
|
||||
flag[f] = (n[40:-1] == "x")
|
||||
@ -39,57 +39,67 @@ class manifest(revlog):
|
||||
self.read(node)
|
||||
return self.mapcache[2]
|
||||
|
||||
def diff(self, a, b):
|
||||
return mdiff.textdiff(str(a), str(b))
|
||||
|
||||
def add(self, map, flags, transaction, link, p1=None, p2=None,
|
||||
changed=None):
|
||||
# directly generate the mdiff delta from the data collected during
|
||||
# the bisect loop below
|
||||
def gendelta(delta):
|
||||
i = 0
|
||||
result = []
|
||||
while i < len(delta):
|
||||
start = delta[i][2]
|
||||
end = delta[i][3]
|
||||
l = delta[i][4]
|
||||
if l == None:
|
||||
l = ""
|
||||
while i < len(delta) - 1 and start <= delta[i+1][2] \
|
||||
and end >= delta[i+1][2]:
|
||||
if delta[i+1][3] > end:
|
||||
end = delta[i+1][3]
|
||||
if delta[i+1][4]:
|
||||
l += delta[i+1][4]
|
||||
|
||||
# returns a tuple (start, end). If the string is found
|
||||
# m[start:end] are the line containing that string. If start == end
|
||||
# the string was not found and they indicate the proper sorted
|
||||
# insertion point. This was taken from bisect_left, and modified
|
||||
# to find line start/end as it goes along.
|
||||
#
|
||||
# m should be a buffer or a string
|
||||
# s is a string
|
||||
#
|
||||
def manifestsearch(m, s, lo=0, hi=None):
|
||||
def advance(i, c):
|
||||
while i < lenm and m[i] != c:
|
||||
i += 1
|
||||
result.append(struct.pack(">lll", start, end, len(l)) + l)
|
||||
i += 1
|
||||
return result
|
||||
return i
|
||||
lenm = len(m)
|
||||
if not hi:
|
||||
hi = lenm
|
||||
while lo < hi:
|
||||
mid = (lo + hi) // 2
|
||||
start = mid
|
||||
while start > 0 and m[start-1] != '\n':
|
||||
start -= 1
|
||||
end = advance(start, '\0')
|
||||
if m[start:end] < s:
|
||||
# we know that after the null there are 40 bytes of sha1
|
||||
# this translates to the bisect lo = mid + 1
|
||||
lo = advance(end + 40, '\n') + 1
|
||||
else:
|
||||
# this translates to the bisect hi = mid
|
||||
hi = start
|
||||
end = advance(lo, '\0')
|
||||
found = m[lo:end]
|
||||
if cmp(s, found) == 0:
|
||||
# we know that after the null there are 40 bytes of sha1
|
||||
end = advance(end + 40, '\n')
|
||||
return (lo, end+1)
|
||||
else:
|
||||
return (lo, lo)
|
||||
|
||||
# apply the changes collected during the bisect loop to our addlist
|
||||
def addlistdelta(addlist, delta):
|
||||
# apply the deltas to the addlist. start from the bottom up
|
||||
# return a delta suitable for addrevision
|
||||
def addlistdelta(addlist, x):
|
||||
# start from the bottom up
|
||||
# so changes to the offsets don't mess things up.
|
||||
i = len(delta)
|
||||
i = len(x)
|
||||
while i > 0:
|
||||
i -= 1
|
||||
start = delta[i][0]
|
||||
end = delta[i][1]
|
||||
if delta[i][4]:
|
||||
addlist[start:end] = [delta[i][4]]
|
||||
start = x[i][0]
|
||||
end = x[i][1]
|
||||
if x[i][2]:
|
||||
addlist[start:end] = array.array('c', x[i][2])
|
||||
else:
|
||||
del addlist[start:end]
|
||||
return addlist
|
||||
|
||||
# calculate the byte offset of the start of each line in the
|
||||
# manifest
|
||||
def calcoffsets(addlist):
|
||||
offsets = [0] * (len(addlist) + 1)
|
||||
offset = 0
|
||||
i = 0
|
||||
while i < len(addlist):
|
||||
offsets[i] = offset
|
||||
offset += len(addlist[i])
|
||||
i += 1
|
||||
offsets[i] = offset
|
||||
return offsets
|
||||
return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2] \
|
||||
for d in x ])
|
||||
|
||||
# if we're using the listcache, make sure it is valid and
|
||||
# parented by the same node we're diffing against
|
||||
@ -98,15 +108,13 @@ class manifest(revlog):
|
||||
files = map.keys()
|
||||
files.sort()
|
||||
|
||||
self.addlist = ["%s\000%s%s\n" %
|
||||
text = ["%s\000%s%s\n" %
|
||||
(f, hex(map[f]), flags[f] and "x" or '')
|
||||
for f in files]
|
||||
self.listcache = array.array('c', "".join(text))
|
||||
cachedelta = None
|
||||
else:
|
||||
addlist = self.listcache[1]
|
||||
|
||||
# find the starting offset for each line in the add list
|
||||
offsets = calcoffsets(addlist)
|
||||
addlist = self.listcache
|
||||
|
||||
# combine the changed lists into one list for sorting
|
||||
work = [[x, 0] for x in changed[0]]
|
||||
@ -114,45 +122,52 @@ class manifest(revlog):
|
||||
work.sort()
|
||||
|
||||
delta = []
|
||||
bs = 0
|
||||
dstart = None
|
||||
dend = None
|
||||
dline = [""]
|
||||
start = 0
|
||||
# zero copy representation of addlist as a buffer
|
||||
addbuf = buffer(addlist)
|
||||
|
||||
# start with a readonly loop that finds the offset of
|
||||
# each line and creates the deltas
|
||||
for w in work:
|
||||
f = w[0]
|
||||
# bs will either be the index of the item or the insert point
|
||||
bs = bisect.bisect(addlist, f, bs)
|
||||
if bs < len(addlist):
|
||||
fn = addlist[bs][:addlist[bs].index('\0')]
|
||||
else:
|
||||
fn = None
|
||||
start, end = manifestsearch(addbuf, f, start)
|
||||
if w[1] == 0:
|
||||
l = "%s\000%s%s\n" % (f, hex(map[f]),
|
||||
flags[f] and "x" or '')
|
||||
else:
|
||||
l = None
|
||||
start = bs
|
||||
if fn != f:
|
||||
# item not found, insert a new one
|
||||
end = bs
|
||||
if w[1] == 1:
|
||||
raise AssertionError(
|
||||
l = ""
|
||||
if start == end and w[1] == 1:
|
||||
# item we want to delete was not found, error out
|
||||
raise AssertionError(
|
||||
_("failed to remove %s from manifest\n") % f)
|
||||
if dstart != None and dstart <= start and dend >= start:
|
||||
if dend < end:
|
||||
dend = end
|
||||
if l:
|
||||
dline.append(l)
|
||||
else:
|
||||
# item is found, replace/delete the existing line
|
||||
end = bs + 1
|
||||
delta.append([start, end, offsets[start], offsets[end], l])
|
||||
if dstart != None:
|
||||
delta.append([dstart, dend, "".join(dline)])
|
||||
dstart = start
|
||||
dend = end
|
||||
dline = [l]
|
||||
|
||||
self.addlist = addlistdelta(addlist, delta)
|
||||
if self.mapcache[0] == self.tip():
|
||||
cachedelta = "".join(gendelta(delta))
|
||||
else:
|
||||
if dstart != None:
|
||||
delta.append([dstart, dend, "".join(dline)])
|
||||
# apply the delta to the addlist, and get a delta for addrevision
|
||||
cachedelta = addlistdelta(addlist, delta)
|
||||
|
||||
# the delta is only valid if we've been processing the tip revision
|
||||
if self.mapcache[0] != self.tip():
|
||||
cachedelta = None
|
||||
self.listcache = addlist
|
||||
|
||||
text = "".join(self.addlist)
|
||||
if cachedelta and mdiff.patch(self.listcache[0], cachedelta) != text:
|
||||
raise AssertionError(_("manifest delta failure\n"))
|
||||
n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
|
||||
n = self.addrevision(buffer(self.listcache), transaction, link, p1, \
|
||||
p2, cachedelta)
|
||||
self.mapcache = (n, map, flags)
|
||||
self.listcache = (text, self.addlist)
|
||||
self.addlist = None
|
||||
|
||||
return n
|
||||
|
Loading…
Reference in New Issue
Block a user