2018-10-27 01:07:18 +03:00
|
|
|
# Copyright 2016 Facebook, Inc.
|
|
|
|
#
|
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
|
2017-04-26 23:08:13 +03:00
|
|
|
from __future__ import absolute_import
|
|
|
|
|
2018-07-06 03:45:27 +03:00
|
|
|
import errno
|
|
|
|
import hashlib
|
2018-10-26 11:13:41 +03:00
|
|
|
import itertools
|
2018-07-06 03:45:27 +03:00
|
|
|
import os
|
|
|
|
import shutil
|
|
|
|
import stat
|
|
|
|
import time
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2019-01-30 03:25:33 +03:00
|
|
|
from edenscm.mercurial import error, phases, progress, pycompat, revlog, util
|
|
|
|
from edenscm.mercurial.i18n import _
|
|
|
|
from edenscm.mercurial.node import bin, hex
|
2016-04-05 02:26:12 +03:00
|
|
|
|
2018-10-09 14:17:57 +03:00
|
|
|
from . import constants, datapack, historypack, shallowutil
|
2018-07-06 03:45:27 +03:00
|
|
|
|
|
|
|
|
flake8: enable F821 check
Summary:
This check is useful and detects real errors (ex. fbconduit). Unfortunately
`arc lint` will run it with both py2 and py3 so a lot of py2 builtins will
still be warned.
I didn't find a clean way to disable py3 check. So this diff tries to fix them.
For `xrange`, the change was done by a script:
```
import sys
import redbaron
headertypes = {'comment', 'endl', 'from_import', 'import', 'string',
'assignment', 'atomtrailers'}
xrangefix = '''try:
xrange(0)
except NameError:
xrange = range
'''
def isxrange(x):
try:
return x[0].value == 'xrange'
except Exception:
return False
def main(argv):
for i, path in enumerate(argv):
print('(%d/%d) scanning %s' % (i + 1, len(argv), path))
content = open(path).read()
try:
red = redbaron.RedBaron(content)
except Exception:
print(' warning: failed to parse')
continue
hasxrange = red.find('atomtrailersnode', value=isxrange)
hasxrangefix = 'xrange = range' in content
if hasxrangefix or not hasxrange:
print(' no need to change')
continue
# find a place to insert the compatibility statement
changed = False
for node in red:
if node.type in headertypes:
continue
# node.insert_before is an easier API, but it has bugs changing
# other "finally" and "except" positions. So do the insert
# manually.
# # node.insert_before(xrangefix)
line = node.absolute_bounding_box.top_left.line - 1
lines = content.splitlines(1)
content = ''.join(lines[:line]) + xrangefix + ''.join(lines[line:])
changed = True
break
if changed:
# "content" is faster than "red.dumps()"
open(path, 'w').write(content)
print(' updated')
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
```
For other py2 builtins that do not have a py3 equivalent, some `# noqa`
were added as a workaround for now.
Reviewed By: DurhamG
Differential Revision: D6934535
fbshipit-source-id: 546b62830af144bc8b46788d2e0fd00496838939
2018-02-10 04:31:44 +03:00
|
|
|
try:
|
|
|
|
xrange(0)
|
|
|
|
except NameError:
|
|
|
|
xrange = range
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2018-10-26 11:13:41 +03:00
|
|
|
# Cache of filename sha to filename, to prevent repeated search for the same
|
|
|
|
# filename shas.
|
|
|
|
filenamehashcache = {}
|
|
|
|
|
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
class basestore(object):
|
2016-04-28 02:49:06 +03:00
|
|
|
def __init__(self, repo, path, reponame, shared=False):
|
2016-04-05 02:48:55 +03:00
|
|
|
"""Creates a remotefilelog store object for the given repo name.
|
|
|
|
|
|
|
|
`path` - The file path where this store keeps its data
|
|
|
|
`reponame` - The name of the repo. This is used to partition data from
|
|
|
|
many repos.
|
|
|
|
`shared` - True if this store is a shared cache of data from the central
|
|
|
|
server, for many repos on this machine. False means this store is for
|
|
|
|
the local data for one repo.
|
|
|
|
"""
|
2016-04-28 02:49:06 +03:00
|
|
|
self.repo = repo
|
|
|
|
self.ui = repo.ui
|
2016-04-05 02:26:12 +03:00
|
|
|
self._path = path
|
|
|
|
self._reponame = reponame
|
|
|
|
self._shared = shared
|
2018-01-09 05:58:08 +03:00
|
|
|
self._uid = os.getuid() if not pycompat.iswindows else None
|
2016-04-05 02:26:12 +03:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
self._validatecachelog = self.ui.config("remotefilelog", "validatecachelog")
|
|
|
|
self._validatecache = self.ui.config("remotefilelog", "validatecache", "on")
|
2019-02-06 01:50:37 +03:00
|
|
|
self._validatehashes = self.ui.configbool(
|
|
|
|
"remotefilelog", "validatecachehashes", True
|
|
|
|
)
|
2018-05-30 12:16:33 +03:00
|
|
|
if self._validatecache not in ("on", "strict", "off"):
|
|
|
|
self._validatecache = "on"
|
|
|
|
if self._validatecache == "off":
|
2016-04-05 02:26:12 +03:00
|
|
|
self._validatecache = False
|
|
|
|
|
2018-10-09 14:17:57 +03:00
|
|
|
self._mutablepacks = None
|
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
if shared:
|
2016-06-08 19:09:06 +03:00
|
|
|
shallowutil.mkstickygroupdir(self.ui, path)
|
2016-04-05 02:26:12 +03:00
|
|
|
|
2016-04-05 02:48:55 +03:00
|
|
|
def getmissing(self, keys):
|
2016-04-05 02:26:12 +03:00
|
|
|
missing = []
|
2018-05-30 12:16:33 +03:00
|
|
|
with progress.bar(
|
|
|
|
self.repo.ui, _("discovering"), _("files"), len(keys)
|
|
|
|
) as prog:
|
2018-03-21 23:49:44 +03:00
|
|
|
for name, node in keys:
|
|
|
|
prog.value += 1
|
|
|
|
|
|
|
|
filepath = self._getfilepath(name, node)
|
|
|
|
try:
|
|
|
|
size = os.path.getsize(filepath)
|
|
|
|
# An empty file is considered corrupt and we pretend it
|
|
|
|
# doesn't exist.
|
|
|
|
exists = size > 0
|
|
|
|
except os.error:
|
|
|
|
exists = False
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
if (
|
|
|
|
exists
|
|
|
|
and self._validatecache == "strict"
|
|
|
|
and not self._validatekey(filepath, "contains")
|
|
|
|
):
|
2018-03-21 23:49:44 +03:00
|
|
|
exists = False
|
|
|
|
if not exists:
|
|
|
|
missing.append((name, node))
|
2018-03-14 19:50:20 +03:00
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
return missing
|
|
|
|
|
2016-04-28 02:49:06 +03:00
|
|
|
# BELOW THIS ARE IMPLEMENTATIONS OF REPACK SOURCE
|
|
|
|
|
2017-11-09 21:32:15 +03:00
|
|
|
def markledger(self, ledger, options=None):
|
|
|
|
if options and options.get(constants.OPTION_PACKSONLY):
|
|
|
|
return
|
2018-08-20 14:21:50 +03:00
|
|
|
|
2018-10-26 11:13:41 +03:00
|
|
|
with ledger.location(self._path):
|
|
|
|
for filename, nodes in self._getfiles():
|
|
|
|
for node in nodes:
|
|
|
|
ledger.markdataentry(self, filename, node)
|
|
|
|
ledger.markhistoryentry(self, filename, node)
|
2016-05-03 22:33:00 +03:00
|
|
|
|
|
|
|
def cleanup(self, ledger):
|
|
|
|
entries = ledger.sources.get(self, [])
|
2018-05-30 12:16:33 +03:00
|
|
|
with progress.bar(self.ui, _("cleaning up"), _("files"), len(entries)) as prog:
|
2018-03-21 23:49:44 +03:00
|
|
|
for entry in entries:
|
|
|
|
if entry.gced or (entry.datarepacked and entry.historyrepacked):
|
|
|
|
path = self._getfilepath(entry.filename, entry.node)
|
|
|
|
util.tryunlink(path)
|
|
|
|
prog.value += 1
|
2016-05-03 22:33:00 +03:00
|
|
|
|
2017-12-01 22:43:18 +03:00
|
|
|
# Clean up the repo cache directory.
|
|
|
|
self._cleanupdirectory(self._getrepocachepath())
|
2016-06-06 20:04:18 +03:00
|
|
|
|
2018-11-12 08:35:22 +03:00
|
|
|
def markforrefresh(self):
|
|
|
|
# This only applies to stores that keep a snapshot of whats on disk.
|
|
|
|
pass
|
|
|
|
|
2016-05-03 22:33:00 +03:00
|
|
|
# BELOW THIS ARE NON-STANDARD APIS
|
|
|
|
|
2017-12-01 22:43:18 +03:00
|
|
|
def _cleanupdirectory(self, rootdir):
|
|
|
|
"""Removes the empty directories and unnecessary files within the root
|
|
|
|
directory recursively. Note that this method does not remove the root
|
|
|
|
directory itself. """
|
2017-12-01 02:41:04 +03:00
|
|
|
|
2017-12-01 22:43:18 +03:00
|
|
|
oldfiles = set()
|
|
|
|
otherfiles = set()
|
2018-10-26 11:13:41 +03:00
|
|
|
havefilename = False
|
2017-12-01 02:41:04 +03:00
|
|
|
# osutil.listdir returns stat information which saves some rmdir/listdir
|
|
|
|
# syscalls.
|
2017-12-01 22:43:18 +03:00
|
|
|
for name, mode in util.osutil.listdir(rootdir):
|
2017-12-01 02:41:04 +03:00
|
|
|
if stat.S_ISDIR(mode):
|
|
|
|
dirpath = os.path.join(rootdir, name)
|
2017-12-01 22:43:18 +03:00
|
|
|
self._cleanupdirectory(dirpath)
|
2017-12-01 02:41:04 +03:00
|
|
|
|
|
|
|
# Now that the directory specified by dirpath is potentially
|
|
|
|
# empty, try and remove it.
|
|
|
|
try:
|
|
|
|
os.rmdir(dirpath)
|
|
|
|
except OSError:
|
|
|
|
pass
|
|
|
|
|
2017-12-01 22:43:18 +03:00
|
|
|
elif stat.S_ISREG(mode):
|
2018-10-26 11:13:41 +03:00
|
|
|
if name == "filename":
|
|
|
|
havefilename = True
|
|
|
|
elif name.endswith("_old"):
|
2017-12-01 22:43:18 +03:00
|
|
|
oldfiles.add(name[:-4])
|
|
|
|
else:
|
|
|
|
otherfiles.add(name)
|
|
|
|
|
|
|
|
# Remove the files which end with suffix '_old' and have no
|
|
|
|
# corresponding file without the suffix '_old'. See addremotefilelognode
|
|
|
|
# method for the generation/purpose of files with '_old' suffix.
|
|
|
|
for filename in oldfiles - otherfiles:
|
2018-05-30 12:16:33 +03:00
|
|
|
filepath = os.path.join(rootdir, filename + "_old")
|
2017-12-01 22:43:18 +03:00
|
|
|
util.tryunlink(filepath)
|
|
|
|
|
2018-10-26 11:13:41 +03:00
|
|
|
# If we've deleted all the files and have a "filename" left over, delete
|
|
|
|
# the filename, too.
|
|
|
|
if havefilename and not otherfiles:
|
|
|
|
filepath = os.path.join(rootdir, "filename")
|
|
|
|
util.tryunlink(filepath)
|
|
|
|
|
2016-05-03 22:33:00 +03:00
|
|
|
def _getfiles(self):
|
2016-04-28 02:49:06 +03:00
|
|
|
"""Return a list of (filename, [node,...]) for all the revisions that
|
|
|
|
exist in the store.
|
|
|
|
|
|
|
|
This is useful for obtaining a list of all the contents of the store
|
|
|
|
when performing a repack to another store, since the store API requires
|
|
|
|
name+node keys and not namehash+node keys.
|
|
|
|
"""
|
|
|
|
existing = {}
|
|
|
|
for filenamehash, node in self._listkeys():
|
|
|
|
existing.setdefault(filenamehash, []).append(node)
|
|
|
|
|
|
|
|
filenamemap = self._resolvefilenames(existing.keys())
|
|
|
|
|
2016-05-05 00:53:13 +03:00
|
|
|
for filename, sha in filenamemap.iteritems():
|
2016-04-28 02:49:06 +03:00
|
|
|
yield (filename, existing[sha])
|
|
|
|
|
|
|
|
def _resolvefilenames(self, hashes):
|
|
|
|
"""Given a list of filename hashes that are present in the
|
|
|
|
remotefilelog store, return a mapping from filename->hash.
|
|
|
|
|
|
|
|
This is useful when converting remotefilelog blobs into other storage
|
|
|
|
formats.
|
|
|
|
"""
|
2016-05-05 00:53:13 +03:00
|
|
|
if not hashes:
|
|
|
|
return {}
|
|
|
|
|
2016-04-28 02:49:06 +03:00
|
|
|
filenames = {}
|
|
|
|
missingfilename = set(hashes)
|
|
|
|
|
2018-10-26 11:13:41 +03:00
|
|
|
if self._shared:
|
|
|
|
getfilenamepath = lambda sha: os.path.join(
|
|
|
|
self._path, self._reponame, sha[:2], sha[2:], "filename"
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
getfilenamepath = lambda sha: os.path.join(self._path, sha, "filename")
|
|
|
|
|
|
|
|
# Search the local cache and filename files in case we look for files
|
|
|
|
# we've already found
|
2018-10-26 11:13:41 +03:00
|
|
|
for sha in hashes:
|
|
|
|
if sha in filenamehashcache:
|
|
|
|
if filenamehashcache[sha] is not None:
|
|
|
|
filenames[filenamehashcache[sha]] = sha
|
2016-04-28 02:49:06 +03:00
|
|
|
missingfilename.discard(sha)
|
2018-10-26 11:13:41 +03:00
|
|
|
filenamepath = getfilenamepath(hex(sha))
|
|
|
|
if os.path.exists(filenamepath):
|
|
|
|
try:
|
|
|
|
filename = shallowutil.readfile(filenamepath)
|
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
checksha = hashlib.sha1(filename).digest()
|
|
|
|
if checksha == sha:
|
|
|
|
filenames[filename] = sha
|
|
|
|
filenamehashcache[sha] = filename
|
|
|
|
missingfilename.discard(sha)
|
|
|
|
else:
|
|
|
|
# The filename file is invalid - delete it.
|
|
|
|
util.tryunlink(filenamepath)
|
2016-04-28 02:49:06 +03:00
|
|
|
|
2018-10-26 11:13:41 +03:00
|
|
|
if not missingfilename:
|
|
|
|
return filenames
|
|
|
|
|
|
|
|
# Scan all draft commits and the last 250000 commits in the changelog
|
|
|
|
# looking for the files. If they're not there, we don't bother looking
|
|
|
|
# further.
|
|
|
|
# developer config: remotefilelog.resolvechangeloglimit
|
|
|
|
unfi = self.repo.unfiltered()
|
|
|
|
cl = unfi.changelog
|
|
|
|
revs = list(unfi.revs("not public()"))
|
|
|
|
scanlen = min(
|
|
|
|
len(cl), self.ui.configint("remotefilelog", "resolvechangeloglimit", 250000)
|
|
|
|
)
|
|
|
|
remainingstr = "%d remaining" % len(missingfilename)
|
|
|
|
with progress.bar(
|
|
|
|
self.ui, "resolving filenames", total=len(revs) + scanlen
|
|
|
|
) as prog:
|
|
|
|
for i, rev in enumerate(
|
|
|
|
itertools.chain(revs, xrange(len(cl) - 1, len(cl) - scanlen, -1))
|
|
|
|
):
|
|
|
|
files = cl.readfiles(cl.node(rev))
|
|
|
|
prog.value = i, remainingstr
|
|
|
|
for filename in files:
|
|
|
|
sha = hashlib.sha1(filename).digest()
|
|
|
|
if sha in missingfilename:
|
|
|
|
filenames[filename] = sha
|
|
|
|
filenamehashcache[sha] = filename
|
|
|
|
missingfilename.discard(sha)
|
|
|
|
remainingstr = "%d remaining" % len(missingfilename)
|
|
|
|
if not missingfilename:
|
|
|
|
break
|
|
|
|
|
|
|
|
# Record anything we didn't find in the cache so that we don't look
|
|
|
|
# for it again.
|
|
|
|
filenamehashcache.update((h, None) for h in missingfilename)
|
2016-04-28 02:49:06 +03:00
|
|
|
|
|
|
|
return filenames
|
|
|
|
|
2017-12-01 02:41:04 +03:00
|
|
|
def _getrepocachepath(self):
|
2018-05-30 12:16:33 +03:00
|
|
|
return os.path.join(self._path, self._reponame) if self._shared else self._path
|
2017-12-01 02:41:04 +03:00
|
|
|
|
2016-04-28 02:49:06 +03:00
|
|
|
def _listkeys(self):
|
|
|
|
"""List all the remotefilelog keys that exist in the store.
|
|
|
|
|
|
|
|
Returns a iterator of (filename hash, filecontent hash) tuples.
|
|
|
|
"""
|
|
|
|
|
2017-12-01 02:41:04 +03:00
|
|
|
for root, dirs, files in os.walk(self._getrepocachepath()):
|
2016-04-28 02:49:06 +03:00
|
|
|
for filename in files:
|
|
|
|
if len(filename) != 40:
|
|
|
|
continue
|
|
|
|
node = filename
|
|
|
|
if self._shared:
|
|
|
|
# .../1a/85ffda..be21
|
|
|
|
filenamehash = root[-41:-39] + root[-38:]
|
|
|
|
else:
|
|
|
|
filenamehash = root[-40:]
|
2019-03-21 19:56:43 +03:00
|
|
|
|
|
|
|
self._reportmetrics(root, filename)
|
|
|
|
|
2016-04-28 02:49:06 +03:00
|
|
|
yield (bin(filenamehash), bin(node))
|
|
|
|
|
2019-03-21 19:56:43 +03:00
|
|
|
def _reportmetrics(self, root, filename):
|
|
|
|
"""Log total remotefilelog blob size and count.
|
|
|
|
|
|
|
|
The method is overloaded in remotefilelogstore class, because we can
|
|
|
|
only count metrics for the datastore. History is kept in the same files
|
|
|
|
so we don't need to log metrics twice.
|
|
|
|
"""
|
|
|
|
pass
|
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
def _getfilepath(self, name, node):
|
2018-10-26 11:13:41 +03:00
|
|
|
"""
|
|
|
|
The path of the file used to store the content of the named file
|
|
|
|
with a particular node hash.
|
|
|
|
"""
|
2016-04-05 02:26:12 +03:00
|
|
|
node = hex(node)
|
|
|
|
if self._shared:
|
2016-04-05 02:26:12 +03:00
|
|
|
key = shallowutil.getcachekey(self._reponame, name, node)
|
2016-04-05 02:26:12 +03:00
|
|
|
else:
|
2016-04-05 02:26:12 +03:00
|
|
|
key = shallowutil.getlocalkey(name, node)
|
2016-04-05 02:26:12 +03:00
|
|
|
|
|
|
|
return os.path.join(self._path, key)
|
2016-04-05 02:26:12 +03:00
|
|
|
|
2018-10-26 11:13:41 +03:00
|
|
|
def _getfilenamepath(self, name):
|
|
|
|
"""
|
|
|
|
The path of the file used to store the name of the named file. This
|
|
|
|
allows reverse lookup from the hashed name back to the original name.
|
|
|
|
|
|
|
|
This is a file named ``filename`` inside the directory where the file
|
|
|
|
content is stored.
|
|
|
|
"""
|
|
|
|
if self._shared:
|
|
|
|
key = shallowutil.getcachekey(self._reponame, name, "filename")
|
|
|
|
else:
|
|
|
|
key = shallowutil.getlocalkey(name, "filename")
|
|
|
|
|
|
|
|
return os.path.join(self._path, key)
|
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
def _getdata(self, name, node):
|
|
|
|
filepath = self._getfilepath(name, node)
|
2018-10-26 11:13:41 +03:00
|
|
|
filenamepath = self._getfilenamepath(name)
|
2016-04-05 02:26:12 +03:00
|
|
|
try:
|
2016-04-05 02:26:12 +03:00
|
|
|
data = shallowutil.readfile(filepath)
|
2018-10-26 11:13:41 +03:00
|
|
|
if not os.path.exists(filenamepath):
|
|
|
|
try:
|
|
|
|
shallowutil.writefile(filenamepath, name, readonly=True)
|
|
|
|
except Exception:
|
|
|
|
pass
|
2016-04-05 02:26:12 +03:00
|
|
|
if self._validatecache and not self._validatedata(data, filepath):
|
|
|
|
if self._validatecachelog:
|
2018-05-30 12:16:33 +03:00
|
|
|
with util.posixfile(self._validatecachelog, "a+") as f:
|
2016-04-05 02:26:12 +03:00
|
|
|
f.write("corrupt %s during read\n" % filepath)
|
|
|
|
os.rename(filepath, filepath + ".corrupt")
|
|
|
|
raise KeyError("corrupt local cache file %s" % filepath)
|
2016-04-05 02:26:12 +03:00
|
|
|
except IOError:
|
2018-05-30 12:16:33 +03:00
|
|
|
raise KeyError(
|
|
|
|
"no file found at %s for %s:%s" % (filepath, name, hex(node))
|
|
|
|
)
|
2016-04-05 02:26:12 +03:00
|
|
|
|
|
|
|
return data
|
|
|
|
|
2016-04-05 02:48:55 +03:00
|
|
|
def addremotefilelognode(self, name, node, data):
|
2016-04-05 02:26:12 +03:00
|
|
|
filepath = self._getfilepath(name, node)
|
2018-10-26 11:13:41 +03:00
|
|
|
filenamepath = self._getfilenamepath(name)
|
2016-04-05 02:26:12 +03:00
|
|
|
|
|
|
|
oldumask = os.umask(0o002)
|
|
|
|
try:
|
|
|
|
# if this node already exists, save the old version for
|
|
|
|
# recovery/debugging purposes.
|
|
|
|
if os.path.exists(filepath):
|
2018-05-30 12:16:33 +03:00
|
|
|
newfilename = filepath + "_old"
|
2016-04-05 02:26:12 +03:00
|
|
|
# newfilename can be read-only and shutil.copy will fail.
|
|
|
|
# Delete newfilename to avoid it
|
|
|
|
if os.path.exists(newfilename):
|
2016-05-20 18:37:49 +03:00
|
|
|
shallowutil.unlinkfile(newfilename)
|
2016-04-05 02:26:12 +03:00
|
|
|
shutil.copy(filepath, newfilename)
|
|
|
|
|
2016-07-08 01:58:59 +03:00
|
|
|
shallowutil.mkstickygroupdir(self.ui, os.path.dirname(filepath))
|
2016-04-05 02:26:12 +03:00
|
|
|
shallowutil.writefile(filepath, data, readonly=True)
|
2018-10-26 11:13:41 +03:00
|
|
|
if not os.path.exists(filenamepath):
|
|
|
|
shallowutil.writefile(filenamepath, name, readonly=True)
|
2016-04-05 02:26:12 +03:00
|
|
|
|
|
|
|
if self._validatecache:
|
2018-05-30 12:16:33 +03:00
|
|
|
if not self._validatekey(filepath, "write"):
|
|
|
|
raise error.Abort(
|
|
|
|
_("local cache write was corrupted %s") % filepath
|
|
|
|
)
|
2016-04-05 02:26:12 +03:00
|
|
|
finally:
|
|
|
|
os.umask(oldumask)
|
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
def markrepo(self, path):
|
2016-04-05 02:48:55 +03:00
|
|
|
"""Call this to add the given repo path to the store's list of
|
|
|
|
repositories that are using it. This is useful later when doing garbage
|
|
|
|
collection, since it allows us to insecpt the repos to see what nodes
|
|
|
|
they want to be kept alive in the store.
|
|
|
|
"""
|
2016-04-05 02:26:12 +03:00
|
|
|
repospath = os.path.join(self._path, "repos")
|
2018-06-20 23:07:24 +03:00
|
|
|
line = os.path.dirname(path) + "\n"
|
|
|
|
# Skip writing to the repos file if the line is already written.
|
|
|
|
try:
|
|
|
|
if line in util.iterfile(open(repospath, "rb")):
|
|
|
|
return
|
|
|
|
except IOError:
|
|
|
|
pass
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
with util.posixfile(repospath, "a") as reposfile:
|
2018-06-20 23:07:24 +03:00
|
|
|
reposfile.write(line)
|
2016-04-05 02:26:12 +03:00
|
|
|
|
2016-05-20 18:37:49 +03:00
|
|
|
repospathstat = os.stat(repospath)
|
|
|
|
if repospathstat.st_uid == self._uid:
|
2016-04-05 02:26:12 +03:00
|
|
|
os.chmod(repospath, 0o0664)
|
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
def _validatekey(self, path, action):
|
2018-05-30 12:16:33 +03:00
|
|
|
with util.posixfile(path, "rb") as f:
|
2016-04-05 02:26:12 +03:00
|
|
|
data = f.read()
|
|
|
|
|
|
|
|
if self._validatedata(data, path):
|
|
|
|
return True
|
|
|
|
|
|
|
|
if self._validatecachelog:
|
2018-05-30 12:16:33 +03:00
|
|
|
with util.posixfile(self._validatecachelog, "a+") as f:
|
2016-04-05 02:26:12 +03:00
|
|
|
f.write("corrupt %s during %s\n" % (path, action))
|
|
|
|
|
|
|
|
os.rename(path, path + ".corrupt")
|
|
|
|
return False
|
|
|
|
|
|
|
|
def _validatedata(self, data, path):
|
|
|
|
try:
|
|
|
|
if len(data) > 0:
|
2017-04-26 23:08:13 +03:00
|
|
|
# see remotefilelogserver.createfileblob for the format
|
|
|
|
offset, size, flags = shallowutil.parsesizeflags(data)
|
2016-04-05 02:26:12 +03:00
|
|
|
if len(data) <= size:
|
|
|
|
# it is truncated
|
|
|
|
return False
|
|
|
|
|
|
|
|
# extract the node from the metadata
|
2017-04-26 23:08:13 +03:00
|
|
|
offset += size
|
2018-05-30 12:16:33 +03:00
|
|
|
datanode = data[offset : offset + 20]
|
2016-04-05 02:26:12 +03:00
|
|
|
|
2019-02-06 01:50:37 +03:00
|
|
|
hexdatanode = hex(datanode)
|
|
|
|
if self._validatehashes:
|
|
|
|
if not shallowutil.verifyfilenode(data, hexdatanode):
|
|
|
|
return False
|
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
# and compare against the path
|
2019-02-06 01:50:37 +03:00
|
|
|
if os.path.basename(path) == hexdatanode:
|
2016-04-05 02:26:12 +03:00
|
|
|
# Content matches the intended path
|
|
|
|
return True
|
|
|
|
return False
|
2017-04-26 23:08:13 +03:00
|
|
|
except (ValueError, RuntimeError):
|
2016-04-05 02:26:12 +03:00
|
|
|
pass
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
def gc(self, keepkeys):
|
|
|
|
ui = self.ui
|
|
|
|
cachepath = self._path
|
|
|
|
|
|
|
|
# prune cache
|
|
|
|
import Queue
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2016-04-05 02:26:12 +03:00
|
|
|
queue = Queue.PriorityQueue()
|
|
|
|
originalsize = 0
|
|
|
|
size = 0
|
|
|
|
count = 0
|
|
|
|
removed = 0
|
|
|
|
|
|
|
|
# keep files newer than a day even if they aren't needed
|
|
|
|
limit = time.time() - (60 * 60 * 24)
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
with progress.bar(ui, _("removing unnecessary files"), _("files")) as prog:
|
2018-03-21 23:49:44 +03:00
|
|
|
for root, dirs, files in os.walk(cachepath):
|
|
|
|
for file in files:
|
2018-10-26 11:13:41 +03:00
|
|
|
if file == "repos" or file == "filename":
|
2018-03-21 23:49:44 +03:00
|
|
|
continue
|
2016-04-05 02:26:12 +03:00
|
|
|
|
2018-03-21 23:49:44 +03:00
|
|
|
# Don't delete pack files
|
2018-05-30 12:16:33 +03:00
|
|
|
if "/packs/" in root:
|
2018-03-21 23:49:44 +03:00
|
|
|
continue
|
2016-04-05 02:26:12 +03:00
|
|
|
|
2018-03-21 23:49:44 +03:00
|
|
|
count += 1
|
|
|
|
prog.value = count
|
|
|
|
path = os.path.join(root, file)
|
|
|
|
key = os.path.relpath(path, cachepath)
|
2016-04-05 02:26:12 +03:00
|
|
|
try:
|
2018-03-21 23:49:44 +03:00
|
|
|
pathstat = os.stat(path)
|
2016-04-05 02:26:12 +03:00
|
|
|
except OSError as e:
|
2016-04-28 02:49:33 +03:00
|
|
|
# errno.ENOENT = no such file or directory
|
|
|
|
if e.errno != errno.ENOENT:
|
2016-04-05 02:26:12 +03:00
|
|
|
raise
|
2018-05-30 12:16:33 +03:00
|
|
|
msg = _("warning: file %s was removed by another " "process\n")
|
2016-04-05 02:26:12 +03:00
|
|
|
ui.warn(msg % path)
|
|
|
|
continue
|
2018-03-21 23:49:44 +03:00
|
|
|
|
|
|
|
originalsize += pathstat.st_size
|
|
|
|
|
|
|
|
if key in keepkeys or pathstat.st_atime > limit:
|
|
|
|
queue.put((pathstat.st_atime, path, pathstat))
|
|
|
|
size += pathstat.st_size
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
shallowutil.unlinkfile(path)
|
|
|
|
except OSError as e:
|
|
|
|
# errno.ENOENT = no such file or directory
|
|
|
|
if e.errno != errno.ENOENT:
|
|
|
|
raise
|
2018-05-30 12:16:33 +03:00
|
|
|
msg = _(
|
|
|
|
"warning: file %s was removed by another " "process\n"
|
|
|
|
)
|
2018-03-21 23:49:44 +03:00
|
|
|
ui.warn(msg % path)
|
|
|
|
continue
|
|
|
|
removed += 1
|
2016-04-05 02:26:12 +03:00
|
|
|
|
|
|
|
# remove oldest files until under limit
|
|
|
|
limit = ui.configbytes("remotefilelog", "cachelimit", "1000 GB")
|
|
|
|
if size > limit:
|
|
|
|
excess = size - limit
|
2018-05-30 12:16:33 +03:00
|
|
|
with progress.bar(
|
|
|
|
ui, _("enforcing cache limit"), _("bytes"), excess
|
|
|
|
) as prog:
|
2018-03-21 23:49:44 +03:00
|
|
|
while queue and size > limit and size > 0:
|
|
|
|
atime, oldpath, oldpathstat = queue.get()
|
|
|
|
try:
|
|
|
|
shallowutil.unlinkfile(oldpath)
|
|
|
|
except OSError as e:
|
|
|
|
# errno.ENOENT = no such file or directory
|
|
|
|
if e.errno != errno.ENOENT:
|
|
|
|
raise
|
2018-05-30 12:16:33 +03:00
|
|
|
msg = _("warning: file %s was removed by another " "process\n")
|
2018-03-21 23:49:44 +03:00
|
|
|
ui.warn(msg % oldpath)
|
|
|
|
size -= oldpathstat.st_size
|
|
|
|
removed += 1
|
|
|
|
prog.value += oldpathstat.st_size
|
2016-04-05 02:26:12 +03:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
ui.status(
|
|
|
|
_("finished: removed %s of %s files (%0.2f GB to %0.2f GB)\n")
|
|
|
|
% (
|
|
|
|
removed,
|
|
|
|
count,
|
|
|
|
float(originalsize) / 1024.0 / 1024.0 / 1024.0,
|
|
|
|
float(size) / 1024.0 / 1024.0 / 1024.0,
|
|
|
|
)
|
|
|
|
)
|
2019-02-08 00:31:07 +03:00
|
|
|
|
|
|
|
def handlecorruption(self, name, node):
|
|
|
|
filepath = self._getfilepath(name, node)
|
|
|
|
if self._shared:
|
|
|
|
self.ui.warn(_("detected corruption in '%s', moving it aside\n") % filepath)
|
|
|
|
os.rename(filepath, filepath + ".corrupt")
|
|
|
|
# Throw a KeyError so UnionStore can catch it and proceed to the
|
|
|
|
# next store.
|
|
|
|
raise KeyError(
|
|
|
|
"corruption in file '%s' for %s:%s" % (filepath, name, hex(node))
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
# Throw a ValueError so UnionStore does not attempt to read further
|
|
|
|
# stores, since local data corruption is not recoverable.
|
|
|
|
raise ValueError(
|
|
|
|
"corruption in file '%s' for %s:%s" % (filepath, name, hex(node))
|
|
|
|
)
|