mirror of
https://github.com/facebook/sapling.git
synced 2024-10-07 15:27:13 +03:00
revlog: use compression engine API for compression
This commit swaps in the just-added revlog compressor API into the revlog class. Instead of implementing zlib compression inline in compress(), we now store a cached-on-first-use revlog compressor on each revlog instance and invoke its "compress()" method. As part of this, revlog.compress() has been refactored a bit to use a cleaner code flow and modern formatting (e.g. avoiding parenthesis around returned tuples). On a mozilla-unified repo, here are the "compress" times for a few commands: $ hg perfrevlogchunks -c ! wall 5.772450 comb 5.780000 user 5.780000 sys 0.000000 (best of 3) ! wall 5.795158 comb 5.790000 user 5.790000 sys 0.000000 (best of 3) $ hg perfrevlogchunks -m ! wall 9.975789 comb 9.970000 user 9.970000 sys 0.000000 (best of 3) ! wall 10.019505 comb 10.010000 user 10.010000 sys 0.000000 (best of 3) Compression times did seem to slow down just a little. There are 360,210 changelog revisions and 359,342 manifest revisions. For the changelog, mean time to compress a revision increased from ~16.025us to ~16.088us. That's basically a function call or an attribute lookup. I suppose this is the price you pay for abstraction. It's so low that I'm not concerned.
This commit is contained in:
parent
29c30e4b7e
commit
24c1205d69
@ -39,7 +39,6 @@ from . import (
|
||||
|
||||
_pack = struct.pack
|
||||
_unpack = struct.unpack
|
||||
_compress = zlib.compress
|
||||
_decompress = zlib.decompress
|
||||
|
||||
# revlog header flags
|
||||
@ -341,6 +340,10 @@ class revlog(object):
|
||||
# revnum -> (chain-length, sum-delta-length)
|
||||
self._chaininfocache = {}
|
||||
|
||||
@util.propertycache
|
||||
def _compressor(self):
|
||||
return util.compengines['zlib'].revlogcompressor()
|
||||
|
||||
def tip(self):
|
||||
return self.node(len(self.index) - 2)
|
||||
def __contains__(self, rev):
|
||||
@ -1465,34 +1468,20 @@ class revlog(object):
|
||||
dfh.close()
|
||||
ifh.close()
|
||||
|
||||
def compress(self, text):
|
||||
""" generate a possibly-compressed representation of text """
|
||||
if not text:
|
||||
return ("", text)
|
||||
l = len(text)
|
||||
bin = None
|
||||
if l < 44:
|
||||
pass
|
||||
elif l > 1000000:
|
||||
# zlib makes an internal copy, thus doubling memory usage for
|
||||
# large files, so lets do this in pieces
|
||||
z = zlib.compressobj()
|
||||
p = []
|
||||
pos = 0
|
||||
while pos < l:
|
||||
pos2 = pos + 2**20
|
||||
p.append(z.compress(text[pos:pos2]))
|
||||
pos = pos2
|
||||
p.append(z.flush())
|
||||
if sum(map(len, p)) < l:
|
||||
bin = "".join(p)
|
||||
else:
|
||||
bin = _compress(text)
|
||||
if bin is None or len(bin) >= l:
|
||||
if text[0] == '\0':
|
||||
return ("", text)
|
||||
return ('u', text)
|
||||
return ("", bin)
|
||||
def compress(self, data):
|
||||
"""Generate a possibly-compressed representation of data."""
|
||||
if not data:
|
||||
return '', data
|
||||
|
||||
compressed = self._compressor.compress(data)
|
||||
|
||||
if compressed:
|
||||
# The revlog compressor added the header in the returned data.
|
||||
return '', compressed
|
||||
|
||||
if data[0] == '\0':
|
||||
return '', data
|
||||
return 'u', data
|
||||
|
||||
def decompress(self, data):
|
||||
"""Decompress a revlog chunk.
|
||||
|
Loading…
Reference in New Issue
Block a user