2014-11-19 21:10:03 +03:00
|
|
|
# simplecache.py - cache slow things locally so they are fast the next time
|
|
|
|
#
|
|
|
|
# Copyright 2014 Facebook, Inc.
|
|
|
|
#
|
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
|
|
|
|
"""
|
|
|
|
simplecache is a dirt-simple cache of various functions that get slow in large
|
|
|
|
repositories. It is aimed at speeding up common operations that programmers
|
|
|
|
often take, like diffing two revisions (eg, hg export).
|
|
|
|
|
|
|
|
Currently we cache the full results of these functions:
|
|
|
|
copies.pathcopies (a dictionary)
|
|
|
|
context.basectx._buildstatus (a scmutil.status object -- a tuple of lists)
|
2018-01-23 23:09:23 +03:00
|
|
|
|
|
|
|
You can disable its debug statements (defaults to 'on' except in tests)::
|
|
|
|
|
|
|
|
[simplecache]
|
|
|
|
showdebug = False
|
2014-11-19 21:10:03 +03:00
|
|
|
"""
|
|
|
|
|
2015-06-12 01:29:43 +03:00
|
|
|
import socket, json, random, os, tempfile
|
2018-01-23 23:09:23 +03:00
|
|
|
from mercurial import (
|
|
|
|
context,
|
|
|
|
copies,
|
|
|
|
encoding,
|
|
|
|
extensions,
|
|
|
|
node,
|
2018-02-05 20:07:10 +03:00
|
|
|
pycompat,
|
2018-01-23 23:09:23 +03:00
|
|
|
)
|
2018-01-24 02:57:00 +03:00
|
|
|
from mercurial.node import (
|
|
|
|
nullid,
|
|
|
|
wdirid
|
|
|
|
)
|
2014-11-19 21:10:03 +03:00
|
|
|
from mercurial.scmutil import status
|
|
|
|
|
2016-11-29 16:24:07 +03:00
|
|
|
testedwith = 'ships-with-fb-hgext'
|
2014-11-19 21:10:03 +03:00
|
|
|
|
2018-01-24 02:57:00 +03:00
|
|
|
# context nodes that are special and should not be cached.
|
|
|
|
UNCACHEABLE_NODES = [
|
|
|
|
None, # repo[None].node() returns this
|
|
|
|
nullid,
|
|
|
|
wdirid
|
|
|
|
]
|
|
|
|
|
flake8: enable F821 check
Summary:
This check is useful and detects real errors (ex. fbconduit). Unfortunately
`arc lint` will run it with both py2 and py3 so a lot of py2 builtins will
still be warned.
I didn't find a clean way to disable py3 check. So this diff tries to fix them.
For `xrange`, the change was done by a script:
```
import sys
import redbaron
headertypes = {'comment', 'endl', 'from_import', 'import', 'string',
'assignment', 'atomtrailers'}
xrangefix = '''try:
xrange(0)
except NameError:
xrange = range
'''
def isxrange(x):
try:
return x[0].value == 'xrange'
except Exception:
return False
def main(argv):
for i, path in enumerate(argv):
print('(%d/%d) scanning %s' % (i + 1, len(argv), path))
content = open(path).read()
try:
red = redbaron.RedBaron(content)
except Exception:
print(' warning: failed to parse')
continue
hasxrange = red.find('atomtrailersnode', value=isxrange)
hasxrangefix = 'xrange = range' in content
if hasxrangefix or not hasxrange:
print(' no need to change')
continue
# find a place to insert the compatibility statement
changed = False
for node in red:
if node.type in headertypes:
continue
# node.insert_before is an easier API, but it has bugs changing
# other "finally" and "except" positions. So do the insert
# manually.
# # node.insert_before(xrangefix)
line = node.absolute_bounding_box.top_left.line - 1
lines = content.splitlines(1)
content = ''.join(lines[:line]) + xrangefix + ''.join(lines[line:])
changed = True
break
if changed:
# "content" is faster than "red.dumps()"
open(path, 'w').write(content)
print(' updated')
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
```
For other py2 builtins that do not have a py3 equivalent, some `# noqa`
were added as a workaround for now.
Reviewed By: DurhamG
Differential Revision: D6934535
fbshipit-source-id: 546b62830af144bc8b46788d2e0fd00496838939
2018-02-10 04:31:44 +03:00
|
|
|
try:
|
|
|
|
xrange(0)
|
|
|
|
except NameError:
|
|
|
|
xrange = range
|
|
|
|
|
2014-11-19 21:10:03 +03:00
|
|
|
def extsetup(ui):
|
|
|
|
extensions.wrapfunction(copies, 'pathcopies', pathcopiesui(ui))
|
|
|
|
extensions.wrapfunction(context.basectx, '_buildstatus', buildstatusui(ui))
|
|
|
|
|
|
|
|
def getmcsock(ui):
|
|
|
|
"""
|
|
|
|
Return a socket opened up to talk to localhost mcrouter.
|
|
|
|
"""
|
|
|
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
|
|
host = ui.config('simplecache', 'host', default='localhost')
|
|
|
|
port = int(ui.config('simplecache', 'port', default=11101))
|
|
|
|
s.connect((host, port))
|
|
|
|
return s
|
|
|
|
|
|
|
|
def mcget(key, ui):
|
|
|
|
"""
|
|
|
|
Use local mcrouter to get a key from memcache
|
|
|
|
"""
|
|
|
|
if type(key) != str:
|
|
|
|
raise ValueError('Key must be a string')
|
|
|
|
s = getmcsock(ui)
|
2015-06-16 03:19:08 +03:00
|
|
|
key = 'cca.hg.%s' % key
|
2014-11-19 21:10:03 +03:00
|
|
|
s.sendall('get %s\r\n' % key)
|
|
|
|
meta = []
|
|
|
|
value = None
|
|
|
|
while True:
|
|
|
|
char = s.recv(1)
|
|
|
|
if char != '\r':
|
|
|
|
meta.append(char)
|
|
|
|
else:
|
|
|
|
meta = ''.join(meta)
|
|
|
|
if meta == 'END':
|
|
|
|
break
|
|
|
|
char = s.recv(1) # throw away newline
|
|
|
|
_, key, flags, sz = ''.join(meta).strip().split(' ')
|
|
|
|
value = s.recv(int(sz))
|
|
|
|
s.recv(7) # throw away \r\nEND\r\n
|
|
|
|
break
|
|
|
|
s.close()
|
|
|
|
return value
|
|
|
|
|
|
|
|
def mcset(key, value, ui):
|
|
|
|
"""
|
|
|
|
Use local mcrouter to set a key to memcache
|
|
|
|
"""
|
|
|
|
if type(key) != str:
|
|
|
|
raise ValueError('Key must be a string')
|
|
|
|
if type(value) != str:
|
|
|
|
raise ValueError('Value must be a string')
|
|
|
|
|
2015-06-16 03:19:08 +03:00
|
|
|
key = 'cca.hg.%s' % key
|
2014-11-19 21:10:03 +03:00
|
|
|
sz = len(value)
|
|
|
|
tmpl = 'set %s 0 0 %d\r\n%s\r\n'
|
|
|
|
s = getmcsock(ui)
|
|
|
|
s.sendall(tmpl % (key, sz, value))
|
|
|
|
data = []
|
|
|
|
while True:
|
|
|
|
char = s.recv(1)
|
|
|
|
if char not in '\r\n':
|
|
|
|
data.append(char)
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
s.close()
|
|
|
|
return ''.join(data) == 'STORED'
|
|
|
|
|
|
|
|
class pathcopiesserializer(object):
|
|
|
|
"""
|
|
|
|
Serialize and deserialize the results of calls to copies.pathcopies.
|
|
|
|
Results are just dictionaries, so this just uses json.
|
|
|
|
"""
|
|
|
|
@staticmethod
|
|
|
|
def serialize(copydict):
|
|
|
|
encoded = dict((k.encode('base64'), v.encode('base64'))
|
|
|
|
for (k, v) in copydict.iteritems())
|
|
|
|
return json.dumps(encoded)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def deserialize(string):
|
|
|
|
encoded = json.loads(string)
|
|
|
|
return dict((k.decode('base64'), v.decode('base64'))
|
|
|
|
for k, v in encoded.iteritems())
|
|
|
|
|
|
|
|
def pathcopiesui(ui):
|
2015-04-17 20:00:47 +03:00
|
|
|
def pathcopies(orig, x, y, match=None):
|
|
|
|
func = lambda: orig(x, y, match=match)
|
2018-01-24 02:48:16 +03:00
|
|
|
if (x.node() not in UNCACHEABLE_NODES and y.node()
|
|
|
|
not in UNCACHEABLE_NODES and not match):
|
2015-06-16 03:19:08 +03:00
|
|
|
key = 'pathcopies:%s:%s' % (
|
2018-01-24 02:48:16 +03:00
|
|
|
node.hex(x.node()), node.hex(y.node()))
|
2015-06-16 03:19:08 +03:00
|
|
|
return memoize(func, key, pathcopiesserializer, ui)
|
2014-11-19 21:10:03 +03:00
|
|
|
return func()
|
|
|
|
return pathcopies
|
|
|
|
|
|
|
|
class buildstatusserializer(object):
|
|
|
|
"""
|
|
|
|
Serialize and deserialize the results of calls to buildstatus.
|
|
|
|
Results are status objects, which extend tuple. Each status object
|
|
|
|
has seven lists within it, each containing strings of filenames in
|
|
|
|
each type of status.
|
|
|
|
"""
|
|
|
|
@staticmethod
|
|
|
|
def serialize(status):
|
|
|
|
ls = [list(status[i]) for i in range(7)]
|
|
|
|
ll = []
|
|
|
|
for s in ls:
|
|
|
|
ll.append([f.encode('base64') for f in s])
|
|
|
|
return json.dumps(ll)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def deserialize(string):
|
|
|
|
ll = json.loads(string)
|
|
|
|
ls = []
|
|
|
|
for l in ll:
|
|
|
|
ls.append([f.decode('base64') for f in l])
|
|
|
|
return status(*ls)
|
|
|
|
|
|
|
|
def buildstatusui(ui):
|
|
|
|
def buildstatus(orig, self, other, status, match, ignored, clean, unknown):
|
|
|
|
func = lambda: orig(self, other, status, match, ignored, clean, unknown)
|
|
|
|
if not match.always():
|
|
|
|
return func()
|
|
|
|
if ignored or clean or unknown:
|
|
|
|
return func()
|
2018-01-24 02:57:00 +03:00
|
|
|
if (self.node() in UNCACHEABLE_NODES or
|
|
|
|
other.node() in UNCACHEABLE_NODES):
|
2014-11-19 21:10:03 +03:00
|
|
|
return func()
|
2015-06-16 03:19:08 +03:00
|
|
|
key = 'buildstatus:%s:%s' % (
|
2018-01-24 02:57:00 +03:00
|
|
|
node.hex(self.node()), node.hex(other.node()))
|
2015-06-16 03:19:08 +03:00
|
|
|
return memoize(func, key, buildstatusserializer, ui)
|
2014-11-19 21:10:03 +03:00
|
|
|
|
|
|
|
return buildstatus
|
|
|
|
|
2015-06-16 03:19:08 +03:00
|
|
|
class stringserializer(object):
|
|
|
|
"""Simple serializer that just checks if the input is a string and returns
|
|
|
|
it.
|
|
|
|
"""
|
|
|
|
@staticmethod
|
|
|
|
def serialize(input):
|
|
|
|
if type(input) is not str:
|
|
|
|
raise TypeError("stringserializer can only be used with strings")
|
|
|
|
return input
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def deserialize(string):
|
|
|
|
if type(string) is not str:
|
|
|
|
raise TypeError("stringserializer can only be used with strings")
|
|
|
|
return string
|
|
|
|
|
2015-06-12 01:29:43 +03:00
|
|
|
def localpath(key, ui):
|
|
|
|
tempdir = ui.config('simplecache', 'cachedir')
|
|
|
|
if not tempdir:
|
|
|
|
tempdir = os.path.join(tempfile.gettempdir(), 'hgsimplecache')
|
|
|
|
return os.path.join(tempdir, key)
|
|
|
|
|
|
|
|
def localget(key, ui):
|
2014-11-19 21:10:03 +03:00
|
|
|
try:
|
2015-06-12 01:29:43 +03:00
|
|
|
path = localpath(key, ui)
|
|
|
|
with open(path) as f:
|
|
|
|
return f.read()
|
2016-01-08 05:30:24 +03:00
|
|
|
except Exception:
|
2015-06-12 01:29:43 +03:00
|
|
|
return None
|
2014-11-19 21:10:03 +03:00
|
|
|
|
2015-06-12 01:29:43 +03:00
|
|
|
def localset(key, value, ui):
|
2014-11-19 21:10:03 +03:00
|
|
|
try:
|
2015-06-12 01:29:43 +03:00
|
|
|
path = localpath(key, ui)
|
|
|
|
dirname = os.path.dirname(path)
|
|
|
|
if not os.path.exists(dirname):
|
|
|
|
os.makedirs(dirname)
|
|
|
|
with open(path, 'w') as f:
|
|
|
|
f.write(value)
|
|
|
|
|
|
|
|
# If too many entries in cache, delete some.
|
|
|
|
tempdirpath = localpath('', ui)
|
|
|
|
entries = os.listdir(tempdirpath)
|
|
|
|
maxcachesize = ui.configint('simplecache', 'maxcachesize', 2000)
|
|
|
|
if len(entries) > maxcachesize:
|
|
|
|
random.shuffle(entries)
|
|
|
|
evictionpercent = ui.configint('simplecache', 'evictionpercent', 50)
|
|
|
|
evictionpercent /= 100.0
|
|
|
|
for i in xrange(0, int(len(entries) * evictionpercent)):
|
|
|
|
os.remove(os.path.join(tempdirpath, entries[i]))
|
2016-01-08 05:30:24 +03:00
|
|
|
except Exception:
|
2015-06-12 01:29:43 +03:00
|
|
|
return
|
|
|
|
|
|
|
|
cachefuncs = {
|
|
|
|
'local' : (localget, localset),
|
|
|
|
'memcache' : (mcget, mcset),
|
|
|
|
}
|
|
|
|
|
2015-06-16 03:19:08 +03:00
|
|
|
def memoize(func, key, serializer, ui):
|
|
|
|
version = ui.config('simplecache', 'version', default='1')
|
|
|
|
key = "%s:v%s" % (key, version)
|
2018-02-05 20:07:10 +03:00
|
|
|
if pycompat.iswindows:
|
|
|
|
# : is prohibited in Windows filenames, while ! is allowed
|
|
|
|
key = key.replace(':', '!')
|
2015-06-12 01:29:43 +03:00
|
|
|
cachelist = ui.configlist('simplecache', 'caches', ['local'])
|
|
|
|
for name in cachelist:
|
|
|
|
get, set = cachefuncs[name]
|
|
|
|
try:
|
|
|
|
cacheval = get(key, ui)
|
|
|
|
if cacheval is not None:
|
2018-01-23 23:09:23 +03:00
|
|
|
_debug(ui, 'got value for key %s from %s\n' % (key, name))
|
2015-06-12 01:29:43 +03:00
|
|
|
value = serializer.deserialize(cacheval)
|
|
|
|
return value
|
2016-01-08 05:30:24 +03:00
|
|
|
except Exception as inst:
|
2018-01-23 23:09:23 +03:00
|
|
|
_debug(ui, 'error getting or deserializing key %s: %s\n'
|
2016-01-08 05:30:24 +03:00
|
|
|
% (key, inst))
|
2015-06-12 01:29:43 +03:00
|
|
|
|
2018-01-23 23:09:23 +03:00
|
|
|
_debug(ui, 'falling back for value %s\n' % (key))
|
2015-06-12 01:29:43 +03:00
|
|
|
value = func()
|
|
|
|
|
|
|
|
for name in cachelist:
|
|
|
|
get, set = cachefuncs[name]
|
|
|
|
try:
|
|
|
|
set(key, serializer.serialize(value), ui)
|
2018-01-23 23:09:23 +03:00
|
|
|
_debug(ui, 'set value for key %s to %s\n' % (key, name))
|
2016-01-08 05:30:24 +03:00
|
|
|
except Exception as inst:
|
2018-01-23 23:09:23 +03:00
|
|
|
_debug(ui, 'error setting key %s: %s\n' % (key, inst))
|
2014-11-19 21:10:03 +03:00
|
|
|
|
|
|
|
return value
|
2018-01-23 23:09:23 +03:00
|
|
|
|
|
|
|
def _runningintests():
|
|
|
|
return 'TESTTMP' in encoding.environ
|
|
|
|
|
|
|
|
def _debug(ui, msg):
|
|
|
|
config = ui.configbool('simplecache', 'showdebug', None)
|
|
|
|
if config is None:
|
|
|
|
config = not _runningintests()
|
|
|
|
|
|
|
|
if config:
|
|
|
|
ui.debug(msg)
|