mirror of
https://github.com/facebook/sapling.git
synced 2024-12-28 07:33:10 +03:00
remotefilelog: enforce a limit on the packfiles size
Summary: Once `remotefilelog.fetchpacks` is enabled, `hg gc` will no longer be able to limit the size of the hgcache. This will be particularly challenging for Sandcastle/Quicksand as they already see hgcache over 100GB. The long-term plan is switching to IndexedLog based stores with a log rotate functionality to control the cache size. In the meantime, we can implement a basic logic to enforce the size of the hgcache that simply remove packfiles once the cache is over the configured size. One complication of this method is that several concurrent Mercurial processes could be running and accessing the packfiles being removed. In this case, we can split the packfiles in 2 categories: ones created a while back, and new ones. Removing packfiles from the first case, lookups will simply raise a KeyError and data will be re-fetched from Memcache/Mononoke, ie: failure is acceptable. The second category belongs to ones that were just created by downloading them from Memcache/Mononoke, and the code strongly assume that they will stick around. A failure at this point will not be recovered. One way of fixing this would be to handle these failures properly and simply retry, the other is to not remove new packfiles. A time of 10 minutes was chosen to categorize the packfiles. Reviewed By: quark-zju Differential Revision: D15014076 fbshipit-source-id: 014eea0251ea3a630aaaa75759cd492271a5c5cd
This commit is contained in:
parent
94257a258b
commit
baacd19749
@ -103,6 +103,15 @@ Configs:
|
||||
The default is true, but this may make some operations cause many tree
|
||||
fetches when used in conjunction with treemanifest in treeonly mode.
|
||||
|
||||
``remotefilelog.cleanoldpacks`` controls whether repack will attempt to
|
||||
limit the size of its cache.
|
||||
|
||||
``remotefilelog.cachelimit`` limit the size of the hgcache to this size.
|
||||
Packfiles will be removed from oldest to newest during repack.
|
||||
|
||||
``remotefilelog.manifestlimit`` limit the size of the manifest cache to this size.
|
||||
Manifests will be removed from oldest to newest during repack.
|
||||
|
||||
``format.userustdatapack`` switches to using the rust data pack
|
||||
implementation.
|
||||
|
||||
|
@ -150,7 +150,8 @@ def _shareddatastorespythonrepack(repo, options, packpath, incremental):
|
||||
def _shareddatastoresrepack(repo, options, incremental):
|
||||
if util.safehasattr(repo.fileslog, "shareddatastores"):
|
||||
packpath = shallowutil.getcachepackpath(repo, constants.FILEPACK_CATEGORY)
|
||||
_cleanuptemppacks(repo.ui, packpath)
|
||||
limit = repo.ui.configbytes("remotefilelog", "cachelimit", "10GB")
|
||||
_cleanuppacks(repo.ui, packpath, limit)
|
||||
|
||||
if _userustrepack(repo):
|
||||
_runrustrepack(
|
||||
@ -194,7 +195,7 @@ def _localdatarepack(repo, options, incremental):
|
||||
packpath = shallowutil.getlocalpackpath(
|
||||
repo.svfs.vfs.base, constants.FILEPACK_CATEGORY
|
||||
)
|
||||
_cleanuptemppacks(repo.ui, packpath)
|
||||
_cleanuppacks(repo.ui, packpath, 0)
|
||||
|
||||
if _userustrepack(repo):
|
||||
_runrustrepack(repo, options, packpath, incremental, _localdatapythonrepack)
|
||||
@ -239,7 +240,12 @@ def _manifestrepack(repo, options, incremental):
|
||||
spackpath, sdstores, shstores = shareddata
|
||||
|
||||
def _domanifestrepack(packpath, dstores, hstores, shared):
|
||||
_cleanuptemppacks(repo.ui, packpath)
|
||||
limit = (
|
||||
repo.ui.configbytes("remotefilelog", "manifestlimit", "2GB")
|
||||
if shared
|
||||
else 0
|
||||
)
|
||||
_cleanuppacks(repo.ui, packpath, limit)
|
||||
if _userustrepack(repo):
|
||||
_runrustrepack(
|
||||
repo,
|
||||
@ -672,6 +678,70 @@ def _cleanuptemppacks(ui, packpath):
|
||||
raise
|
||||
|
||||
|
||||
def _cleanupoldpacks(ui, packpath, limit):
|
||||
"""Enforce a size limit on the cache. Packfiles will be removed oldest
|
||||
first, with the asumption that old packfiles contains less useful data than new ones.
|
||||
"""
|
||||
with progress.spinner(ui, _("cleaning old packs")):
|
||||
|
||||
def _mtime(f):
|
||||
stat = os.lstat(f)
|
||||
return stat.st_mtime
|
||||
|
||||
def _listpackfiles(path):
|
||||
packs = []
|
||||
for f in os.listdir(path):
|
||||
_, ext = os.path.splitext(f)
|
||||
if ext.endswith("pack"):
|
||||
packs.append(os.path.join(packpath, f))
|
||||
|
||||
return packs
|
||||
|
||||
files = sorted(_listpackfiles(packpath), key=_mtime, reverse=True)
|
||||
|
||||
cachesize = 0
|
||||
for f in files:
|
||||
stat = os.lstat(f)
|
||||
cachesize += stat.st_size
|
||||
|
||||
while cachesize > limit:
|
||||
f = files.pop()
|
||||
stat = os.lstat(f)
|
||||
|
||||
# Dont't remove files that are newer than 10 minutes. This will
|
||||
# avoid a race condition where mercurial downloads files from the
|
||||
# network and expect these to be present on disk. If the 'limit' is
|
||||
# properly set, we should have removed enough files that this
|
||||
# condition won't matter.
|
||||
if time.gmtime(stat.st_mtime + 10 * 60) > time.gmtime():
|
||||
return
|
||||
|
||||
root, ext = os.path.splitext(f)
|
||||
try:
|
||||
if ext == datapack.PACKSUFFIX:
|
||||
os.unlink(root + datapack.INDEXSUFFIX)
|
||||
else:
|
||||
os.unlink(root + historypack.INDEXSUFFIX)
|
||||
except OSError as ex:
|
||||
if ex.errno != errno.ENOENT:
|
||||
raise
|
||||
|
||||
try:
|
||||
os.unlink(f)
|
||||
except OSError as ex:
|
||||
if ex.errno != errno.ENOENT:
|
||||
raise
|
||||
|
||||
cachesize -= stat.st_size
|
||||
|
||||
|
||||
def _cleanuppacks(ui, packpath, limit):
|
||||
_cleanuptemppacks(ui, packpath)
|
||||
if ui.configbool("remotefilelog", "cleanoldpacks"):
|
||||
if limit != 0:
|
||||
_cleanupoldpacks(ui, packpath, limit)
|
||||
|
||||
|
||||
class repacker(object):
|
||||
"""Class for orchestrating the repack of data and history information into a
|
||||
new format.
|
||||
|
55
tests/test-fb-hgext-remotefilelog-repack-remove-old.t
Normal file
55
tests/test-fb-hgext-remotefilelog-repack-remove-old.t
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
$ . "$TESTDIR/library.sh"
|
||||
|
||||
$ cat >> $HGRCPATH <<EOF
|
||||
> [format]
|
||||
> userustdatapack=True
|
||||
> [remotefilelog]
|
||||
> fetchpacks=True
|
||||
> EOF
|
||||
|
||||
$ hginit master
|
||||
$ cd master
|
||||
$ cat >> .hg/hgrc <<EOF
|
||||
> [remotefilelog]
|
||||
> server=True
|
||||
> serverexpiration=-1
|
||||
> EOF
|
||||
$ echo x > x
|
||||
$ hg commit -qAm x
|
||||
$ echo x >> x
|
||||
$ hg commit -qAm x2
|
||||
$ cd ..
|
||||
|
||||
$ hgcloneshallow ssh://user@dummy/master shallow -q
|
||||
1 files fetched over 1 fetches - (1 misses, 0.00% hit ratio) over *s (glob)
|
||||
|
||||
$ cd shallow
|
||||
$ cat >> .hg/hgrc <<EOF
|
||||
> [remotefilelog]
|
||||
> userustrepack=True
|
||||
> EOF
|
||||
$ cd ..
|
||||
|
||||
$ cd shallow
|
||||
$ find $CACHEDIR | sort
|
||||
$TESTTMP/hgcache
|
||||
$TESTTMP/hgcache/master
|
||||
$TESTTMP/hgcache/master/packs
|
||||
$TESTTMP/hgcache/master/packs/276d308429d0303762befa376788300f0310f90e.histidx
|
||||
$TESTTMP/hgcache/master/packs/276d308429d0303762befa376788300f0310f90e.histpack
|
||||
$TESTTMP/hgcache/master/packs/887690f1138ae5b99c50d754ed02262874bf8ecb.dataidx
|
||||
$TESTTMP/hgcache/master/packs/887690f1138ae5b99c50d754ed02262874bf8ecb.datapack
|
||||
|
||||
$ touch -m -t 200001010000 $TESTTMP/hgcache/master/packs/887690f1138ae5b99c50d754ed02262874bf8ecb.datapack
|
||||
|
||||
# Cleanup the old over the limit packfiles
|
||||
$ hg repack --config remotefilelog.cleanoldpacks=True --config remotefilelog.cachelimit="10B"
|
||||
|
||||
$ find $CACHEDIR | sort
|
||||
$TESTTMP/hgcache
|
||||
$TESTTMP/hgcache/master
|
||||
$TESTTMP/hgcache/master/packs
|
||||
$TESTTMP/hgcache/master/packs/276d308429d0303762befa376788300f0310f90e.histidx
|
||||
$TESTTMP/hgcache/master/packs/276d308429d0303762befa376788300f0310f90e.histpack
|
||||
$TESTTMP/hgcache/master/packs/repacklock
|
Loading…
Reference in New Issue
Block a user