sapling/mercurial/verify.py

299 lines
10 KiB
Python
Raw Normal View History

2006-08-08 01:27:09 +04:00
# verify.py - repository integrity checking for Mercurial
#
# Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
2006-08-08 01:27:09 +04:00
#
# This software may be used and distributed according to the terms of the
2010-01-20 07:20:08 +03:00
# GNU General Public License version 2 or any later version.
2006-08-08 01:27:09 +04:00
from node import nullid, short
2006-12-15 05:25:19 +03:00
from i18n import _
2009-11-02 12:15:04 +03:00
import os
import revlog, util, error
2006-08-08 01:27:09 +04:00
def verify(repo):
lock = repo.lock()
try:
return _verify(repo)
finally:
lock.release()
def _verify(repo):
mflinkrevs = {}
2006-08-08 01:27:09 +04:00
filelinkrevs = {}
filenodes = {}
revisions = 0
2009-05-17 06:14:15 +04:00
badrevs = set()
2006-08-08 01:27:09 +04:00
errors = [0]
warnings = [0]
2008-06-26 23:35:50 +04:00
ui = repo.ui
cl = repo.changelog
mf = repo.manifest
lrugetctx = util.lrucachefunc(repo.changectx)
2006-08-08 01:27:09 +04:00
if not repo.cancopy():
raise util.Abort(_("cannot verify bundle or remote repos"))
def err(linkrev, msg, filename=None):
if linkrev != None:
2009-05-17 06:14:15 +04:00
badrevs.add(linkrev)
else:
linkrev = '?'
msg = "%s: %s" % (linkrev, msg)
if filename:
msg = "%s@%s" % (filename, msg)
2008-06-26 23:35:50 +04:00
ui.warn(" " + msg + "\n")
2006-08-08 01:27:09 +04:00
errors[0] += 1
def exc(linkrev, msg, inst, filename=None):
if isinstance(inst, KeyboardInterrupt):
ui.warn(_("interrupted"))
raise
err(linkrev, "%s: %s" % (msg, inst), filename)
2006-08-08 01:27:09 +04:00
def warn(msg):
2008-06-26 23:35:50 +04:00
ui.warn(msg + "\n")
2006-08-08 01:27:09 +04:00
warnings[0] += 1
def checklog(obj, name, linkrev):
if not len(obj) and (havecl or havemf):
err(linkrev, _("empty or missing %s") % name)
return
2006-08-08 01:27:09 +04:00
d = obj.checksize()
if d[0]:
err(None, _("data length off by %d bytes") % d[0], name)
2006-08-08 01:27:09 +04:00
if d[1]:
err(None, _("index contains %d extra bytes") % d[1], name)
2006-08-08 01:27:09 +04:00
if obj.version != revlog.REVLOGV0:
if not revlogv1:
warn(_("warning: `%s' uses revlog format 1") % name)
elif revlogv1:
warn(_("warning: `%s' uses revlog format 0") % name)
def checkentry(obj, i, node, seen, linkrevs, f):
lr = obj.linkrev(obj.rev(node))
if lr < 0 or (havecl and lr not in linkrevs):
if lr < 0 or lr >= len(cl):
2009-03-30 02:56:53 +04:00
msg = _("rev %d points to nonexistent changeset %d")
else:
msg = _("rev %d points to unexpected changeset %d")
err(None, msg % (i, lr), f)
if linkrevs:
if f and len(linkrevs) > 1:
try:
# attempt to filter down to real linkrevs
linkrevs = [l for l in linkrevs
if lrugetctx(l)[f].filenode() == node]
except:
pass
warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
lr = None # can't be trusted
try:
p1, p2 = obj.parents(node)
if p1 not in seen and p1 != nullid:
err(lr, _("unknown parent 1 %s of %s") %
(short(p1), short(n)), f)
if p2 not in seen and p2 != nullid:
err(lr, _("unknown parent 2 %s of %s") %
(short(p2), short(p1)), f)
except Exception, inst:
exc(lr, _("checking parents of %s") % short(node), inst, f)
if node in seen:
err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f)
seen[n] = i
return lr
2009-11-02 12:15:04 +03:00
if os.path.exists(repo.sjoin("journal")):
ui.warn(_("abandoned transaction found - run hg recover\n"))
2008-06-26 23:35:50 +04:00
revlogv1 = cl.version != revlog.REVLOGV0
if ui.verbose or not revlogv1:
ui.status(_("repository uses revlog format %d\n") %
2006-08-08 01:27:09 +04:00
(revlogv1 and 1 or 0))
havecl = len(cl) > 0
havemf = len(mf) > 0
2006-08-08 01:27:09 +04:00
ui.status(_("checking changesets\n"))
seen = {}
checklog(cl, "changelog", 0)
2010-02-11 03:46:19 +03:00
total = len(repo)
for i in repo:
ui.progress(_('checking'), i, total=total)
2008-06-26 23:35:50 +04:00
n = cl.node(i)
checkentry(cl, i, n, seen, [i], "changelog")
2006-08-08 01:27:09 +04:00
try:
2008-06-26 23:35:50 +04:00
changes = cl.read(n)
mflinkrevs.setdefault(changes[0], []).append(i)
for f in changes[3]:
filelinkrevs.setdefault(f, []).append(i)
2006-08-08 01:27:09 +04:00
except Exception, inst:
exc(i, _("unpacking changeset %s") % short(n), inst)
ui.progress(_('checking'), None)
2006-08-08 01:27:09 +04:00
2008-06-26 23:35:50 +04:00
ui.status(_("checking manifests\n"))
seen = {}
checklog(mf, "manifest", 0)
2010-02-11 03:46:19 +03:00
total = len(mf)
2008-06-26 23:35:50 +04:00
for i in mf:
ui.progress(_('checking'), i, total=total)
2008-06-26 23:35:50 +04:00
n = mf.node(i)
lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
if n in mflinkrevs:
del mflinkrevs[n]
else:
err(lr, _("%s not in changesets") % short(n), "manifest")
2006-08-08 01:27:09 +04:00
try:
2008-06-26 23:35:50 +04:00
for f, fn in mf.readdelta(n).iteritems():
if not f:
err(lr, _("file without name in manifest"))
elif f != "/dev/null":
filenodes.setdefault(f, {}).setdefault(fn, lr)
2006-08-08 01:27:09 +04:00
except Exception, inst:
exc(lr, _("reading manifest delta %s") % short(n), inst)
ui.progress(_('checking'), None)
2006-08-08 01:27:09 +04:00
2008-06-26 23:35:50 +04:00
ui.status(_("crosschecking files in changesets and manifests\n"))
2006-08-08 01:27:09 +04:00
2010-02-11 03:46:19 +03:00
total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
count = 0
if havemf:
2010-01-25 09:05:27 +03:00
for c, m in sorted([(c, m) for m in mflinkrevs
for c in mflinkrevs[m]]):
2010-02-11 03:46:19 +03:00
count += 1
ui.progress(_('crosschecking'), count, total=total)
err(c, _("changeset refers to unknown manifest %s") % short(m))
mflinkrevs = None # del is bad here due to scope issues
for f in sorted(filelinkrevs):
2010-02-11 03:46:19 +03:00
count += 1
ui.progress(_('crosschecking'), count, total=total)
if f not in filenodes:
lr = filelinkrevs[f][0]
err(lr, _("in changeset but not in manifest"), f)
2006-08-08 01:27:09 +04:00
if havecl:
for f in sorted(filenodes):
2010-02-11 03:46:19 +03:00
count += 1
ui.progress(_('crosschecking'), count, total=total)
if f not in filelinkrevs:
try:
fl = repo.file(f)
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
except:
lr = None
err(lr, _("in manifest but not in changeset"), f)
ui.progress(_('crosschecking'), None)
2010-02-11 03:46:19 +03:00
2008-06-26 23:35:50 +04:00
ui.status(_("checking files\n"))
2008-08-14 05:18:41 +04:00
2009-05-17 06:14:15 +04:00
storefiles = set()
for f, f2, size in repo.store.datafiles():
if not f:
err(None, _("cannot decode filename '%s'") % f2)
elif size > 0:
2009-05-17 06:14:15 +04:00
storefiles.add(f)
2008-08-14 05:18:41 +04:00
files = sorted(set(filenodes) | set(filelinkrevs))
2010-02-11 03:46:19 +03:00
total = len(files)
for i, f in enumerate(files):
ui.progress(_('checking'), i, item=f, total=total)
try:
linkrevs = filelinkrevs[f]
except KeyError:
# in manifest but not in changelog
linkrevs = []
if linkrevs:
lr = linkrevs[0]
else:
lr = None
try:
fl = repo.file(f)
except error.RevlogError, e:
err(lr, _("broken revlog! (%s)") % e, f)
continue
2008-08-14 05:18:41 +04:00
for ff in fl.files():
2008-08-14 05:18:41 +04:00
try:
2009-05-17 06:14:15 +04:00
storefiles.remove(ff)
2008-08-14 05:18:41 +04:00
except KeyError:
err(lr, _("missing revlog!"), ff)
2008-08-14 05:18:41 +04:00
checklog(fl, f, lr)
2006-08-08 01:27:09 +04:00
seen = {}
for i in fl:
2006-08-08 01:27:09 +04:00
revisions += 1
n = fl.node(i)
lr = checkentry(fl, i, n, seen, linkrevs, f)
if f in filenodes:
if havemf and n not in filenodes[f]:
err(lr, _("%s not in manifests") % (short(n)), f)
else:
del filenodes[f][n]
2006-08-08 01:27:09 +04:00
# verify contents
try:
t = fl.read(n)
rp = fl.renamed(n)
if len(t) != fl.size(i):
if len(fl.revision(n)) != fl.size(i):
err(lr, _("unpacked size is %s, %s expected") %
(len(t), fl.size(i)), f)
except Exception, inst:
exc(lr, _("unpacking %s") % short(n), inst, f)
2006-08-08 01:27:09 +04:00
2006-12-01 11:35:46 +03:00
# check renames
try:
if rp:
if lr is not None and ui.verbose:
ctx = lrugetctx(lr)
found = False
for pctx in ctx.parents():
if rp[0] in pctx:
found = True
break
if not found:
warn(_("warning: copy source of '%s' not"
" in parents of %s") % (f, ctx))
2006-12-01 11:35:46 +03:00
fl2 = repo.file(rp[0])
if not len(fl2):
err(lr, _("empty or missing copy source revlog %s:%s")
% (rp[0], short(rp[1])), f)
elif rp[1] == nullid:
ui.note(_("warning: %s@%s: copy source"
" revision is nullid %s:%s\n")
% (f, lr, rp[0], short(rp[1])))
else:
fl2.rev(rp[1])
2006-12-01 11:35:46 +03:00
except Exception, inst:
exc(lr, _("checking rename of %s") % short(n), inst, f)
2006-12-01 11:35:46 +03:00
2006-08-08 01:27:09 +04:00
# cross-check
if f in filenodes:
2010-01-25 09:05:27 +03:00
fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
for lr, node in sorted(fns):
err(lr, _("%s in manifests not found") % short(node), f)
ui.progress(_('checking'), None)
2006-08-08 01:27:09 +04:00
2008-08-14 05:18:41 +04:00
for f in storefiles:
warn(_("warning: orphan revlog '%s'") % f)
2008-06-26 23:35:50 +04:00
ui.status(_("%d files, %d changesets, %d total revisions\n") %
(len(files), len(cl), revisions))
2006-08-08 01:27:09 +04:00
if warnings[0]:
2008-06-26 23:35:50 +04:00
ui.warn(_("%d warnings encountered!\n") % warnings[0])
2006-08-08 01:27:09 +04:00
if errors[0]:
2008-06-26 23:35:50 +04:00
ui.warn(_("%d integrity errors encountered!\n") % errors[0])
if badrevs:
2008-06-26 23:35:50 +04:00
ui.warn(_("(first damaged changeset appears to be %d)\n")
% min(badrevs))
2006-08-08 01:27:09 +04:00
return 1