mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 09:17:30 +03:00
24ce0242d7
Adds a cache client implementation using the opensource python-memcached library. It's more of an educational example than a production ready one since it doesn't perform the requests asynchronously. It does however split up large files into smaller chunks for you.
210 lines
5.5 KiB
Python
Executable File
210 lines
5.5 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# cacheclient.py - example cache client implementation
|
|
#
|
|
# Copyright 2013 Facebook, Inc.
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
# The remotefilelog extension can optionally use a caching layer to serve
|
|
# file revision requests. This is an example implementation that uses
|
|
# the python-memcached library: https://pypi.python.org/pypi/python-memcached/
|
|
# A better implementation would make all of the requests non-blocking.
|
|
|
|
import os, sys, memcache
|
|
|
|
stdin = sys.stdin
|
|
stdout = sys.stdout
|
|
stderr = sys.stderr
|
|
|
|
mc = None
|
|
keyprefix = None
|
|
cachepath = None
|
|
|
|
# Max number of keys per request
|
|
batchsize = 1000
|
|
|
|
# Max value size per key (in bytes)
|
|
maxsize = 512 * 1024
|
|
|
|
def readfile(path):
|
|
f = open(path, "r")
|
|
try:
|
|
return f.read()
|
|
finally:
|
|
f.close()
|
|
|
|
def writefile(path, content):
|
|
dirname = os.path.dirname(path)
|
|
if not os.path.exists(dirname):
|
|
os.makedirs(dirname)
|
|
|
|
f = open(path, "w")
|
|
try:
|
|
f.write(content)
|
|
finally:
|
|
f.close()
|
|
|
|
def compress(value):
|
|
# Real world implementations will want to compress values.
|
|
# Insert your favorite compression here, ex:
|
|
# return lz4.compressHC(value)
|
|
return value
|
|
|
|
def decompress(value):
|
|
# Real world implementations will want to compress values.
|
|
# Insert your favorite compression here, ex:
|
|
# return lz4.decompress(value)
|
|
return value
|
|
|
|
def generateKey(id):
|
|
return keyprefix + id
|
|
|
|
def generateId(key):
|
|
return key[len(keyprefix):]
|
|
|
|
def getKeys():
|
|
raw = stdin.readline()[:-1]
|
|
keycount = int(raw)
|
|
|
|
keys = []
|
|
for i in range(keycount):
|
|
id = stdin.readline()[:-1]
|
|
keys.append(generateKey(id))
|
|
|
|
results = mc.get_multi(keys)
|
|
|
|
hits = 0
|
|
for i, key in enumerate(keys):
|
|
value = results.get(key)
|
|
id = generateId(key)
|
|
# On hit, write to disk
|
|
if value:
|
|
# Integer hit indicates a large file
|
|
if isinstance(value, int):
|
|
largekeys = list([key + str(i) for i in range(value)])
|
|
largevalues = mc.get_multi(largekeys)
|
|
if len(largevalues) == value:
|
|
value = ""
|
|
for largekey in largekeys:
|
|
value += largevalues[largekey]
|
|
else:
|
|
# A chunk is missing, give up
|
|
stdout.write(id + "\n")
|
|
stdout.flush()
|
|
continue
|
|
path = os.path.join(cachepath, id)
|
|
value = decompress(value)
|
|
writefile(path, value)
|
|
hits += 1
|
|
else:
|
|
# On miss, report to caller
|
|
stdout.write(id + "\n")
|
|
stdout.flush()
|
|
|
|
if i % 500 == 0:
|
|
stdout.write("_hits_%s_\n" % hits)
|
|
stdout.flush()
|
|
|
|
# done signal
|
|
stdout.write("0\n")
|
|
stdout.flush()
|
|
|
|
def setKeys():
|
|
raw = stdin.readline()[:-1]
|
|
keycount = int(raw)
|
|
|
|
values = {}
|
|
for i in range(keycount):
|
|
id = stdin.readline()[:-1]
|
|
path = os.path.join(cachepath, id)
|
|
|
|
value = readfile(path)
|
|
value = compress(value)
|
|
|
|
key = generateKey(id)
|
|
if len(value) > maxsize:
|
|
# split up large files
|
|
start = 0
|
|
i = 0
|
|
while start < len(value):
|
|
end = min(len(value), start + maxsize)
|
|
values[key + str(i)] = value[start:end]
|
|
start += maxsize
|
|
i += 1
|
|
|
|
# Large files are stored as an integer representing how many
|
|
# chunks it's broken into.
|
|
value = i
|
|
|
|
values[key] = value
|
|
|
|
if len(values) == batchsize:
|
|
mc.set_multi(values)
|
|
values = {}
|
|
|
|
if values:
|
|
mc.set_multi(values)
|
|
|
|
def main(argv=None):
|
|
"""
|
|
remotefilelog uses this cacheclient by setting it in the repo config:
|
|
|
|
[remotefilelog]
|
|
cacheprocess = cacheclient <ip address> <memcache prefix>
|
|
|
|
When memcache requests need to be made, it will execute this process
|
|
with the following arguments:
|
|
|
|
cacheclient <ip address> <memcache prefix><internal prefix> <cache path>
|
|
|
|
Communication happens via stdin and stdout. To make a get request,
|
|
the following is written to stdin:
|
|
|
|
get\n
|
|
<key count>\n
|
|
<key1>\n
|
|
<key...>\n
|
|
<keyN>\n
|
|
|
|
The results of any cache hits will be written directly to <cache path>/<key>.
|
|
Any cache misses will be written to stdout in the form <key>\n. Once all
|
|
hits and misses are finished 0\n will be written to stdout to signal
|
|
completion.
|
|
|
|
During the request, progress may be reported via stdout with the format
|
|
_hits_###_\n where ### is an integer representing the number of hits so
|
|
far. remotefilelog uses this to display a progress bar.
|
|
|
|
A single cacheclient process may be used for multiple requests (though
|
|
not in parallel), so it stays open until it receives exit\n via stdin.
|
|
|
|
"""
|
|
if argv == None:
|
|
argv = sys.argv
|
|
|
|
global cachepath
|
|
global keyprefix
|
|
global mc
|
|
|
|
ip = argv[1]
|
|
keyprefix = argv[2]
|
|
cachepath = argv[3]
|
|
|
|
mc = memcache.Client([ip], debug=0)
|
|
|
|
while True:
|
|
cmd = stdin.readline()[:-1]
|
|
if cmd == "get":
|
|
getKeys()
|
|
elif cmd == "set":
|
|
setKeys()
|
|
elif cmd == "exit":
|
|
return 0
|
|
else:
|
|
stderr.write("Invalid Command %s\n" % cmd)
|
|
return 1
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|