introduce fncache repository layout

* adds a new entry 'fncache' to '.hg/requires' for new repos
* writes new file '.hg/store/fncache'
* hash-encodes filenames with long paths (issue839)
* encodes Windows reserved filenames (issue793)
This commit is contained in:
Adrian Buehlmann 2008-10-19 19:12:07 +02:00
parent 3aa9ebf062
commit f3f0f9ecd6
7 changed files with 215 additions and 21 deletions

View File

@ -16,7 +16,7 @@ import merge as merge_
class localrepository(repo.repository): class localrepository(repo.repository):
capabilities = util.set(('lookup', 'changegroupsubset')) capabilities = util.set(('lookup', 'changegroupsubset'))
supported = ('revlogv1', 'store') supported = ('revlogv1', 'store', 'fncache')
def __init__(self, parentui, path=None, create=0): def __init__(self, parentui, path=None, create=0):
repo.repository.__init__(self) repo.repository.__init__(self)
@ -35,6 +35,7 @@ class localrepository(repo.repository):
if parentui.configbool('format', 'usestore', True): if parentui.configbool('format', 'usestore', True):
os.mkdir(os.path.join(self.path, "store")) os.mkdir(os.path.join(self.path, "store"))
requirements.append("store") requirements.append("store")
requirements.append("fncache")
# create an invalid changelog # create an invalid changelog
self.opener("00changelog.i", "a").write( self.opener("00changelog.i", "a").write(
'\0\0\0\2' # represents revlogv2 '\0\0\0\2' # represents revlogv2

View File

@ -5,8 +5,11 @@
# This software may be used and distributed according to the terms # This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference. # of the GNU General Public License, incorporated herein by reference.
from i18n import _
import os, stat, osutil, util import os, stat, osutil, util
_sha = util.sha1
def _buildencodefun(): def _buildencodefun():
e = '_' e = '_'
win_reserved = [ord(x) for x in '\\:*?"<>|'] win_reserved = [ord(x) for x in '\\:*?"<>|']
@ -35,6 +38,93 @@ def _buildencodefun():
encodefilename, decodefilename = _buildencodefun() encodefilename, decodefilename = _buildencodefun()
def _build_lower_encodefun():
win_reserved = [ord(x) for x in '\\:*?"<>|']
cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
for x in (range(32) + range(126, 256) + win_reserved):
cmap[chr(x)] = "~%02x" % x
for x in range(ord("A"), ord("Z")+1):
cmap[chr(x)] = chr(x).lower()
return lambda s: "".join([cmap[c] for c in s])
lowerencode = _build_lower_encodefun()
_windows_reserved_filenames = '''con prn aux nul
com1 com2 com3 com4 com5 com6 com7 com8 com9
lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
def auxencode(path):
res = []
for n in path.split('/'):
if n:
base = n.split('.')[0]
if base and (base in _windows_reserved_filenames):
# encode third letter ('aux' -> 'au~78')
ec = "~%02x" % ord(n[2])
n = n[0:2] + ec + n[3:]
res.append(n)
return '/'.join(res)
MAX_PATH_LEN_IN_HGSTORE = 120
DIR_PREFIX_LEN = 8
_MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
def hybridencode(path):
'''encodes path with a length limit
Encodes all paths that begin with 'data/', according to the following.
Default encoding (reversible):
Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
characters are encoded as '~xx', where xx is the two digit hex code
of the character (see encodefilename).
Relevant path components consisting of Windows reserved filenames are
masked by encoding the third character ('aux' -> 'au~78', see auxencode).
Hashed encoding (not reversible):
If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
non-reversible hybrid hashing of the path is done instead.
This encoding uses up to DIR_PREFIX_LEN characters of all directory
levels of the lowerencoded path, but not more levels than can fit into
_MAX_SHORTENED_DIRS_LEN.
Then follows the filler followed by the sha digest of the full path.
The filler is the beginning of the basename of the lowerencoded path
(the basename is everything after the last path separator). The filler
is as long as possible, filling in characters from the basename until
the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
of the basename have been taken).
The extension (e.g. '.i' or '.d') is preserved.
The string 'data/' at the beginning is replaced with 'dh/', if the hashed
encoding was used.
'''
if not path.startswith('data/'):
return path
ndpath = path[len('data/'):]
res = 'data/' + auxencode(encodefilename(ndpath))
if len(res) > MAX_PATH_LEN_IN_HGSTORE:
digest = _sha(path).hexdigest()
aep = auxencode(lowerencode(ndpath))
_root, ext = os.path.splitext(aep)
parts = aep.split('/')
basename = parts[-1]
sdirs = []
for p in parts[:-1]:
d = p[:DIR_PREFIX_LEN]
t = '/'.join(sdirs) + '/' + d
if len(t) > _MAX_SHORTENED_DIRS_LEN:
break
sdirs.append(d)
dirs = '/'.join(sdirs)
if len(dirs) > 0:
dirs += '/'
res = 'dh/' + dirs + digest + ext
space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
if space_left > 0:
filler = basename[:space_left]
res = 'dh/' + dirs + filler + digest + ext
return res
def _calcmode(path): def _calcmode(path):
try: try:
# files in .hg/ will be created using this mode # files in .hg/ will be created using this mode
@ -120,8 +210,83 @@ class encodedstore(basicstore):
return (['requires', '00changelog.i'] + return (['requires', '00changelog.i'] +
[self.pathjoiner('store', f) for f in _data.split()]) [self.pathjoiner('store', f) for f in _data.split()])
def fncache(opener):
'''yields the entries in the fncache file'''
try:
fp = opener('fncache', mode='rb')
except IOError:
# skip nonexistent file
return
for n, line in enumerate(fp):
if (len(line) < 2) or (line[-1] != '\n'):
t = _('invalid entry in fncache, line %s') % (n + 1)
raise util.Abort(t)
yield line[:-1]
fp.close()
class fncacheopener(object):
def __init__(self, opener):
self.opener = opener
self.entries = None
def loadfncache(self):
self.entries = {}
for f in fncache(self.opener):
self.entries[f] = True
def __call__(self, path, mode='r', *args, **kw):
if mode not in ('r', 'rb') and path.startswith('data/'):
if self.entries is None:
self.loadfncache()
if path not in self.entries:
self.opener('fncache', 'ab').write(path + '\n')
# fncache may contain non-existent files after rollback / strip
self.entries[path] = True
return self.opener(hybridencode(path), mode, *args, **kw)
class fncachestore(basicstore):
def __init__(self, path, opener, pathjoiner):
self.pathjoiner = pathjoiner
self.path = self.pathjoiner(path, 'store')
self.createmode = _calcmode(self.path)
self._op = opener(self.path)
self._op.createmode = self.createmode
self.opener = fncacheopener(self._op)
def join(self, f):
return self.pathjoiner(self.path, hybridencode(f))
def datafiles(self):
rewrite = False
existing = []
pjoin = self.pathjoiner
spath = self.path
for f in fncache(self._op):
ef = hybridencode(f)
try:
st = os.stat(pjoin(spath, ef))
yield f, ef, st.st_size
existing.append(f)
except OSError:
# nonexistent entry
rewrite = True
if rewrite:
# rewrite fncache to remove nonexistent entries
# (may be caused by rollback / strip)
fp = self._op('fncache', mode='wb')
for p in existing:
fp.write(p + '\n')
fp.close()
def copylist(self):
d = _data + ' dh fncache'
return (['requires', '00changelog.i'] +
[self.pathjoiner('store', f) for f in d.split()])
def store(requirements, path, opener, pathjoiner=None): def store(requirements, path, opener, pathjoiner=None):
pathjoiner = pathjoiner or os.path.join pathjoiner = pathjoiner or os.path.join
if 'store' in requirements: if 'store' in requirements:
if 'fncache' in requirements:
return fncachestore(path, opener, pathjoiner)
return encodedstore(path, opener, pathjoiner) return encodedstore(path, opener, pathjoiner)
return basicstore(path, opener, pathjoiner) return basicstore(path, opener, pathjoiner)

View File

@ -2,6 +2,7 @@
CONTRIBDIR=$TESTDIR/../contrib CONTRIBDIR=$TESTDIR/../contrib
echo % prepare repo-a
mkdir repo-a mkdir repo-a
cd repo-a cd repo-a
hg init hg init
@ -18,11 +19,13 @@ hg commit -m third -d '0 0'
hg verify hg verify
echo dumping revlog of file a to stdout: echo
echo % dumping revlog of file a to stdout
python $CONTRIBDIR/dumprevlog .hg/store/data/a.i python $CONTRIBDIR/dumprevlog .hg/store/data/a.i
echo dumprevlog done echo % dumprevlog done
# dump all revlogs to file repo.dump echo
echo % dump all revlogs to file repo.dump
find .hg/store -name "*.i" | sort | xargs python $CONTRIBDIR/dumprevlog > ../repo.dump find .hg/store -name "*.i" | sort | xargs python $CONTRIBDIR/dumprevlog > ../repo.dump
cd .. cd ..
@ -31,17 +34,28 @@ mkdir repo-b
cd repo-b cd repo-b
hg init hg init
echo undumping: echo
echo % undumping into repo-b
python $CONTRIBDIR/undumprevlog < ../repo.dump python $CONTRIBDIR/undumprevlog < ../repo.dump
echo undumping done echo % undumping done
cd ..
echo
echo % clone --pull repo-b repo-c to rebuild fncache
hg clone --pull -U repo-b repo-c
cd repo-c
echo
echo % verify repo-c
hg verify hg verify
cd .. cd ..
echo comparing repos: echo
hg -R repo-b incoming repo-a echo % comparing repos
hg -R repo-a incoming repo-b hg -R repo-c incoming repo-a
echo comparing done hg -R repo-a incoming repo-c
exit 0 exit 0

View File

@ -1,9 +1,11 @@
% prepare repo-a
checking changesets checking changesets
checking manifests checking manifests
crosschecking files in changesets and manifests crosschecking files in changesets and manifests
checking files checking files
1 files, 3 changesets, 3 total revisions 1 files, 3 changesets, 3 total revisions
dumping revlog of file a to stdout:
% dumping revlog of file a to stdout
file: .hg/store/data/a.i file: .hg/store/data/a.i
node: 183d2312b35066fb6b3b449b84efc370d50993d0 node: 183d2312b35066fb6b3b449b84efc370d50993d0
linkrev: 0 linkrev: 0
@ -32,22 +34,34 @@ adding to file a
adding more to file a adding more to file a
-end- -end-
dumprevlog done % dumprevlog done
undumping:
% dump all revlogs to file repo.dump
% undumping into repo-b
.hg/store/00changelog.i .hg/store/00changelog.i
.hg/store/00manifest.i .hg/store/00manifest.i
.hg/store/data/a.i .hg/store/data/a.i
undumping done % undumping done
% clone --pull repo-b repo-c to rebuild fncache
requesting all changes
adding changesets
adding manifests
adding file changes
added 3 changesets with 3 changes to 1 files
% verify repo-c
checking changesets checking changesets
checking manifests checking manifests
crosschecking files in changesets and manifests crosschecking files in changesets and manifests
checking files checking files
1 files, 3 changesets, 3 total revisions 1 files, 3 changesets, 3 total revisions
comparing repos:
% comparing repos
comparing with repo-a comparing with repo-a
searching for changes searching for changes
no changes found no changes found
comparing with repo-b comparing with repo-c
searching for changes searching for changes
no changes found no changes found
comparing done

View File

@ -22,6 +22,7 @@
00770 ./.hg/store/data/dir/ 00770 ./.hg/store/data/dir/
00660 ./.hg/store/data/dir/bar.i 00660 ./.hg/store/data/dir/bar.i
00660 ./.hg/store/data/foo.i 00660 ./.hg/store/data/foo.i
00660 ./.hg/store/fncache
00660 ./.hg/store/undo 00660 ./.hg/store/undo
00660 ./.hg/undo.branch 00660 ./.hg/undo.branch
00660 ./.hg/undo.dirstate 00660 ./.hg/undo.dirstate
@ -49,6 +50,7 @@
00770 ../push/.hg/store/data/dir/ 00770 ../push/.hg/store/data/dir/
00660 ../push/.hg/store/data/dir/bar.i 00660 ../push/.hg/store/data/dir/bar.i
00660 ../push/.hg/store/data/foo.i 00660 ../push/.hg/store/data/foo.i
00660 ../push/.hg/store/fncache
00660 ../push/.hg/store/undo 00660 ../push/.hg/store/undo
00660 ../push/.hg/undo.branch 00660 ../push/.hg/undo.branch
00660 ../push/.hg/undo.dirstate 00660 ../push/.hg/undo.dirstate

View File

@ -3,6 +3,7 @@ store created
00changelog.i created 00changelog.i created
revlogv1 revlogv1
store store
fncache
adding foo adding foo
# creating repo with old format # creating repo with old format
revlogv1 revlogv1

View File

@ -17,7 +17,6 @@ checking changesets
checking manifests checking manifests
crosschecking files in changesets and manifests crosschecking files in changesets and manifests
checking files checking files
?: cannot decode filename 'data/X_f_o_o.txt.i'
data/FOO.txt.i@0: missing revlog! data/FOO.txt.i@0: missing revlog!
0: empty or missing FOO.txt 0: empty or missing FOO.txt
FOO.txt@0: f62022d3d590 in manifests not found FOO.txt@0: f62022d3d590 in manifests not found
@ -27,8 +26,6 @@ checking files
data/bar.txt.i@0: missing revlog! data/bar.txt.i@0: missing revlog!
0: empty or missing bar.txt 0: empty or missing bar.txt
bar.txt@0: 256559129457 in manifests not found bar.txt@0: 256559129457 in manifests not found
warning: orphan revlog 'data/xbar.txt.i'
3 files, 1 changesets, 0 total revisions 3 files, 1 changesets, 0 total revisions
1 warnings encountered! 9 integrity errors encountered!
10 integrity errors encountered!
(first damaged changeset appears to be 0) (first damaged changeset appears to be 0)