2006-08-08 01:27:09 +04:00
|
|
|
# verify.py - repository integrity checking for Mercurial
|
|
|
|
#
|
2007-06-19 10:51:34 +04:00
|
|
|
# Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
|
2006-08-08 01:27:09 +04:00
|
|
|
#
|
2009-04-26 03:08:54 +04:00
|
|
|
# This software may be used and distributed according to the terms of the
|
2010-01-20 07:20:08 +03:00
|
|
|
# GNU General Public License version 2 or any later version.
|
2006-08-08 01:27:09 +04:00
|
|
|
|
2008-03-07 00:23:26 +03:00
|
|
|
from node import nullid, short
|
2006-12-15 05:25:19 +03:00
|
|
|
from i18n import _
|
2009-11-02 12:15:04 +03:00
|
|
|
import os
|
2009-03-07 22:04:56 +03:00
|
|
|
import revlog, util, error
|
2006-08-08 01:27:09 +04:00
|
|
|
|
|
|
|
def verify(repo):
|
2007-07-22 01:02:10 +04:00
|
|
|
lock = repo.lock()
|
|
|
|
try:
|
|
|
|
return _verify(repo)
|
|
|
|
finally:
|
2009-04-22 04:01:22 +04:00
|
|
|
lock.release()
|
2007-07-22 01:02:10 +04:00
|
|
|
|
|
|
|
def _verify(repo):
|
2008-06-26 23:35:50 +04:00
|
|
|
mflinkrevs = {}
|
2006-08-08 01:27:09 +04:00
|
|
|
filelinkrevs = {}
|
|
|
|
filenodes = {}
|
2008-06-26 23:35:50 +04:00
|
|
|
revisions = 0
|
2009-05-17 06:14:15 +04:00
|
|
|
badrevs = set()
|
2006-08-08 01:27:09 +04:00
|
|
|
errors = [0]
|
|
|
|
warnings = [0]
|
2008-06-26 23:35:50 +04:00
|
|
|
ui = repo.ui
|
|
|
|
cl = repo.changelog
|
|
|
|
mf = repo.manifest
|
2009-10-28 21:17:03 +03:00
|
|
|
lrugetctx = util.lrucachefunc(repo.changectx)
|
2006-08-08 01:27:09 +04:00
|
|
|
|
2008-10-18 22:43:14 +04:00
|
|
|
if not repo.cancopy():
|
|
|
|
raise util.Abort(_("cannot verify bundle or remote repos"))
|
|
|
|
|
2007-09-22 02:18:06 +04:00
|
|
|
def err(linkrev, msg, filename=None):
|
2010-11-22 20:15:58 +03:00
|
|
|
if linkrev is not None:
|
2009-05-17 06:14:15 +04:00
|
|
|
badrevs.add(linkrev)
|
2007-09-22 02:18:06 +04:00
|
|
|
else:
|
2008-06-26 23:35:50 +04:00
|
|
|
linkrev = '?'
|
2007-09-22 02:18:06 +04:00
|
|
|
msg = "%s: %s" % (linkrev, msg)
|
|
|
|
if filename:
|
|
|
|
msg = "%s@%s" % (filename, msg)
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.warn(" " + msg + "\n")
|
2006-08-08 01:27:09 +04:00
|
|
|
errors[0] += 1
|
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
def exc(linkrev, msg, inst, filename=None):
|
|
|
|
if isinstance(inst, KeyboardInterrupt):
|
|
|
|
ui.warn(_("interrupted"))
|
|
|
|
raise
|
2010-08-06 01:17:17 +04:00
|
|
|
if not str(inst):
|
|
|
|
inst = repr(inst)
|
2008-06-26 23:35:50 +04:00
|
|
|
err(linkrev, "%s: %s" % (msg, inst), filename)
|
|
|
|
|
2006-08-08 01:27:09 +04:00
|
|
|
def warn(msg):
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.warn(msg + "\n")
|
2006-08-08 01:27:09 +04:00
|
|
|
warnings[0] += 1
|
|
|
|
|
2009-05-04 21:51:08 +04:00
|
|
|
def checklog(obj, name, linkrev):
|
2008-06-26 23:35:50 +04:00
|
|
|
if not len(obj) and (havecl or havemf):
|
2009-05-04 21:51:08 +04:00
|
|
|
err(linkrev, _("empty or missing %s") % name)
|
2008-06-26 23:35:50 +04:00
|
|
|
return
|
|
|
|
|
2006-08-08 01:27:09 +04:00
|
|
|
d = obj.checksize()
|
|
|
|
if d[0]:
|
2007-09-22 02:18:06 +04:00
|
|
|
err(None, _("data length off by %d bytes") % d[0], name)
|
2006-08-08 01:27:09 +04:00
|
|
|
if d[1]:
|
2007-09-22 02:18:06 +04:00
|
|
|
err(None, _("index contains %d extra bytes") % d[1], name)
|
2006-08-08 01:27:09 +04:00
|
|
|
|
|
|
|
if obj.version != revlog.REVLOGV0:
|
|
|
|
if not revlogv1:
|
|
|
|
warn(_("warning: `%s' uses revlog format 1") % name)
|
|
|
|
elif revlogv1:
|
|
|
|
warn(_("warning: `%s' uses revlog format 0") % name)
|
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
def checkentry(obj, i, node, seen, linkrevs, f):
|
2008-11-13 00:19:14 +03:00
|
|
|
lr = obj.linkrev(obj.rev(node))
|
2008-06-26 23:35:50 +04:00
|
|
|
if lr < 0 or (havecl and lr not in linkrevs):
|
|
|
|
if lr < 0 or lr >= len(cl):
|
2009-03-30 02:56:53 +04:00
|
|
|
msg = _("rev %d points to nonexistent changeset %d")
|
|
|
|
else:
|
|
|
|
msg = _("rev %d points to unexpected changeset %d")
|
|
|
|
err(None, msg % (i, lr), f)
|
2008-06-26 23:35:50 +04:00
|
|
|
if linkrevs:
|
2009-10-28 21:17:03 +03:00
|
|
|
if f and len(linkrevs) > 1:
|
|
|
|
try:
|
|
|
|
# attempt to filter down to real linkrevs
|
|
|
|
linkrevs = [l for l in linkrevs
|
|
|
|
if lrugetctx(l)[f].filenode() == node]
|
2012-05-12 18:02:46 +04:00
|
|
|
except Exception:
|
2009-10-28 21:17:03 +03:00
|
|
|
pass
|
2009-07-23 01:12:54 +04:00
|
|
|
warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
|
2008-06-26 23:35:50 +04:00
|
|
|
lr = None # can't be trusted
|
|
|
|
|
|
|
|
try:
|
|
|
|
p1, p2 = obj.parents(node)
|
|
|
|
if p1 not in seen and p1 != nullid:
|
|
|
|
err(lr, _("unknown parent 1 %s of %s") %
|
|
|
|
(short(p1), short(n)), f)
|
|
|
|
if p2 not in seen and p2 != nullid:
|
|
|
|
err(lr, _("unknown parent 2 %s of %s") %
|
|
|
|
(short(p2), short(p1)), f)
|
|
|
|
except Exception, inst:
|
|
|
|
exc(lr, _("checking parents of %s") % short(node), inst, f)
|
|
|
|
|
|
|
|
if node in seen:
|
|
|
|
err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f)
|
|
|
|
seen[n] = i
|
|
|
|
return lr
|
|
|
|
|
2009-11-02 12:15:04 +03:00
|
|
|
if os.path.exists(repo.sjoin("journal")):
|
|
|
|
ui.warn(_("abandoned transaction found - run hg recover\n"))
|
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
revlogv1 = cl.version != revlog.REVLOGV0
|
|
|
|
if ui.verbose or not revlogv1:
|
|
|
|
ui.status(_("repository uses revlog format %d\n") %
|
2006-08-08 01:27:09 +04:00
|
|
|
(revlogv1 and 1 or 0))
|
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
havecl = len(cl) > 0
|
|
|
|
havemf = len(mf) > 0
|
2006-08-08 01:27:09 +04:00
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.status(_("checking changesets\n"))
|
|
|
|
seen = {}
|
2009-05-04 21:51:08 +04:00
|
|
|
checklog(cl, "changelog", 0)
|
2010-02-11 03:46:19 +03:00
|
|
|
total = len(repo)
|
2008-06-26 23:35:50 +04:00
|
|
|
for i in repo:
|
2010-08-02 08:15:47 +04:00
|
|
|
ui.progress(_('checking'), i, total=total, unit=_('changesets'))
|
2008-06-26 23:35:50 +04:00
|
|
|
n = cl.node(i)
|
2008-06-26 23:35:50 +04:00
|
|
|
checkentry(cl, i, n, seen, [i], "changelog")
|
2006-08-08 01:27:09 +04:00
|
|
|
|
|
|
|
try:
|
2008-06-26 23:35:50 +04:00
|
|
|
changes = cl.read(n)
|
2008-06-26 23:35:50 +04:00
|
|
|
mflinkrevs.setdefault(changes[0], []).append(i)
|
|
|
|
for f in changes[3]:
|
|
|
|
filelinkrevs.setdefault(f, []).append(i)
|
2006-08-08 01:27:09 +04:00
|
|
|
except Exception, inst:
|
2008-06-26 23:35:50 +04:00
|
|
|
exc(i, _("unpacking changeset %s") % short(n), inst)
|
2010-08-02 08:15:47 +04:00
|
|
|
ui.progress(_('checking'), None)
|
2006-08-08 01:27:09 +04:00
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.status(_("checking manifests\n"))
|
2008-06-26 23:35:50 +04:00
|
|
|
seen = {}
|
2009-05-04 21:51:08 +04:00
|
|
|
checklog(mf, "manifest", 0)
|
2010-02-11 03:46:19 +03:00
|
|
|
total = len(mf)
|
2008-06-26 23:35:50 +04:00
|
|
|
for i in mf:
|
2010-08-02 08:15:47 +04:00
|
|
|
ui.progress(_('checking'), i, total=total, unit=_('manifests'))
|
2008-06-26 23:35:50 +04:00
|
|
|
n = mf.node(i)
|
2008-06-26 23:35:50 +04:00
|
|
|
lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
|
|
|
|
if n in mflinkrevs:
|
|
|
|
del mflinkrevs[n]
|
2009-05-14 18:22:49 +04:00
|
|
|
else:
|
|
|
|
err(lr, _("%s not in changesets") % short(n), "manifest")
|
2006-08-08 01:27:09 +04:00
|
|
|
|
|
|
|
try:
|
2008-06-26 23:35:50 +04:00
|
|
|
for f, fn in mf.readdelta(n).iteritems():
|
2008-06-26 23:35:50 +04:00
|
|
|
if not f:
|
|
|
|
err(lr, _("file without name in manifest"))
|
|
|
|
elif f != "/dev/null":
|
2009-10-28 21:17:03 +03:00
|
|
|
filenodes.setdefault(f, {}).setdefault(fn, lr)
|
2006-08-08 01:27:09 +04:00
|
|
|
except Exception, inst:
|
2008-06-26 23:35:50 +04:00
|
|
|
exc(lr, _("reading manifest delta %s") % short(n), inst)
|
2010-08-02 08:15:47 +04:00
|
|
|
ui.progress(_('checking'), None)
|
2006-08-08 01:27:09 +04:00
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.status(_("crosschecking files in changesets and manifests\n"))
|
2006-08-08 01:27:09 +04:00
|
|
|
|
2010-02-11 03:46:19 +03:00
|
|
|
total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
|
|
|
|
count = 0
|
2008-06-26 23:35:50 +04:00
|
|
|
if havemf:
|
2010-01-25 09:05:27 +03:00
|
|
|
for c, m in sorted([(c, m) for m in mflinkrevs
|
|
|
|
for c in mflinkrevs[m]]):
|
2010-02-11 03:46:19 +03:00
|
|
|
count += 1
|
2011-07-14 01:28:46 +04:00
|
|
|
if m == nullid:
|
|
|
|
continue
|
2010-03-15 20:33:39 +03:00
|
|
|
ui.progress(_('crosschecking'), count, total=total)
|
2007-11-21 00:46:20 +03:00
|
|
|
err(c, _("changeset refers to unknown manifest %s") % short(m))
|
2009-07-05 13:04:31 +04:00
|
|
|
mflinkrevs = None # del is bad here due to scope issues
|
2007-11-21 00:46:20 +03:00
|
|
|
|
2009-04-27 01:50:44 +04:00
|
|
|
for f in sorted(filelinkrevs):
|
2010-02-11 03:46:19 +03:00
|
|
|
count += 1
|
2010-03-15 20:33:39 +03:00
|
|
|
ui.progress(_('crosschecking'), count, total=total)
|
2007-11-21 00:46:20 +03:00
|
|
|
if f not in filenodes:
|
|
|
|
lr = filelinkrevs[f][0]
|
|
|
|
err(lr, _("in changeset but not in manifest"), f)
|
2006-08-08 01:27:09 +04:00
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
if havecl:
|
2009-04-27 01:50:44 +04:00
|
|
|
for f in sorted(filenodes):
|
2010-02-11 03:46:19 +03:00
|
|
|
count += 1
|
2010-03-15 20:33:39 +03:00
|
|
|
ui.progress(_('crosschecking'), count, total=total)
|
2008-06-26 23:35:50 +04:00
|
|
|
if f not in filelinkrevs:
|
|
|
|
try:
|
2008-11-13 00:19:14 +03:00
|
|
|
fl = repo.file(f)
|
|
|
|
lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
|
2012-05-12 18:02:46 +04:00
|
|
|
except Exception:
|
2008-06-26 23:35:50 +04:00
|
|
|
lr = None
|
|
|
|
err(lr, _("in manifest but not in changeset"), f)
|
|
|
|
|
2010-03-15 20:33:39 +03:00
|
|
|
ui.progress(_('crosschecking'), None)
|
2010-02-11 03:46:19 +03:00
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.status(_("checking files\n"))
|
2008-08-14 05:18:41 +04:00
|
|
|
|
2009-05-17 06:14:15 +04:00
|
|
|
storefiles = set()
|
2008-08-14 05:18:43 +04:00
|
|
|
for f, f2, size in repo.store.datafiles():
|
|
|
|
if not f:
|
|
|
|
err(None, _("cannot decode filename '%s'") % f2)
|
2010-09-06 00:32:11 +04:00
|
|
|
elif size > 0 or not revlogv1:
|
2009-05-17 06:14:15 +04:00
|
|
|
storefiles.add(f)
|
2008-08-14 05:18:41 +04:00
|
|
|
|
2009-04-27 01:50:44 +04:00
|
|
|
files = sorted(set(filenodes) | set(filelinkrevs))
|
2010-02-11 03:46:19 +03:00
|
|
|
total = len(files)
|
|
|
|
for i, f in enumerate(files):
|
2010-03-15 20:33:39 +03:00
|
|
|
ui.progress(_('checking'), i, item=f, total=total)
|
2009-04-24 12:44:39 +04:00
|
|
|
try:
|
|
|
|
linkrevs = filelinkrevs[f]
|
|
|
|
except KeyError:
|
|
|
|
# in manifest but not in changelog
|
|
|
|
linkrevs = []
|
|
|
|
|
|
|
|
if linkrevs:
|
|
|
|
lr = linkrevs[0]
|
|
|
|
else:
|
|
|
|
lr = None
|
|
|
|
|
2009-03-07 22:04:56 +03:00
|
|
|
try:
|
|
|
|
fl = repo.file(f)
|
|
|
|
except error.RevlogError, e:
|
2009-03-07 22:16:03 +03:00
|
|
|
err(lr, _("broken revlog! (%s)") % e, f)
|
2009-03-07 22:04:56 +03:00
|
|
|
continue
|
2008-08-14 05:18:41 +04:00
|
|
|
|
2008-08-14 05:18:43 +04:00
|
|
|
for ff in fl.files():
|
2008-08-14 05:18:41 +04:00
|
|
|
try:
|
2009-05-17 06:14:15 +04:00
|
|
|
storefiles.remove(ff)
|
2008-08-14 05:18:41 +04:00
|
|
|
except KeyError:
|
2009-03-07 22:16:03 +03:00
|
|
|
err(lr, _("missing revlog!"), ff)
|
2008-08-14 05:18:41 +04:00
|
|
|
|
2009-05-04 21:51:08 +04:00
|
|
|
checklog(fl, f, lr)
|
2006-08-08 01:27:09 +04:00
|
|
|
seen = {}
|
2010-08-06 01:17:33 +04:00
|
|
|
rp = None
|
2008-06-26 23:35:50 +04:00
|
|
|
for i in fl:
|
2006-08-08 01:27:09 +04:00
|
|
|
revisions += 1
|
|
|
|
n = fl.node(i)
|
2009-04-24 12:44:39 +04:00
|
|
|
lr = checkentry(fl, i, n, seen, linkrevs, f)
|
2007-11-21 00:46:20 +03:00
|
|
|
if f in filenodes:
|
|
|
|
if havemf and n not in filenodes[f]:
|
2008-06-26 23:35:50 +04:00
|
|
|
err(lr, _("%s not in manifests") % (short(n)), f)
|
2007-11-21 00:46:20 +03:00
|
|
|
else:
|
|
|
|
del filenodes[f][n]
|
2006-08-08 01:27:09 +04:00
|
|
|
|
|
|
|
# verify contents
|
|
|
|
try:
|
2010-08-06 01:17:17 +04:00
|
|
|
l = len(fl.read(n))
|
2008-06-26 23:35:50 +04:00
|
|
|
rp = fl.renamed(n)
|
2010-08-06 01:17:17 +04:00
|
|
|
if l != fl.size(i):
|
2009-01-21 20:15:47 +03:00
|
|
|
if len(fl.revision(n)) != fl.size(i):
|
2008-06-26 23:35:50 +04:00
|
|
|
err(lr, _("unpacked size is %s, %s expected") %
|
2010-08-06 01:17:17 +04:00
|
|
|
(l, fl.size(i)), f)
|
2007-09-22 02:18:06 +04:00
|
|
|
except Exception, inst:
|
2008-06-26 23:35:50 +04:00
|
|
|
exc(lr, _("unpacking %s") % short(n), inst, f)
|
2006-08-08 01:27:09 +04:00
|
|
|
|
2006-12-01 11:35:46 +03:00
|
|
|
# check renames
|
|
|
|
try:
|
|
|
|
if rp:
|
2009-10-07 00:46:31 +04:00
|
|
|
if lr is not None and ui.verbose:
|
|
|
|
ctx = lrugetctx(lr)
|
|
|
|
found = False
|
|
|
|
for pctx in ctx.parents():
|
|
|
|
if rp[0] in pctx:
|
|
|
|
found = True
|
|
|
|
break
|
|
|
|
if not found:
|
|
|
|
warn(_("warning: copy source of '%s' not"
|
|
|
|
" in parents of %s") % (f, ctx))
|
2006-12-01 11:35:46 +03:00
|
|
|
fl2 = repo.file(rp[0])
|
2008-06-26 23:35:50 +04:00
|
|
|
if not len(fl2):
|
2008-06-26 23:35:50 +04:00
|
|
|
err(lr, _("empty or missing copy source revlog %s:%s")
|
2008-04-15 00:31:33 +04:00
|
|
|
% (rp[0], short(rp[1])), f)
|
|
|
|
elif rp[1] == nullid:
|
2009-07-01 09:41:14 +04:00
|
|
|
ui.note(_("warning: %s@%s: copy source"
|
|
|
|
" revision is nullid %s:%s\n")
|
2008-09-08 02:50:34 +04:00
|
|
|
% (f, lr, rp[0], short(rp[1])))
|
2008-04-15 00:31:33 +04:00
|
|
|
else:
|
2009-03-23 15:13:02 +03:00
|
|
|
fl2.rev(rp[1])
|
2006-12-01 11:35:46 +03:00
|
|
|
except Exception, inst:
|
2008-06-26 23:35:50 +04:00
|
|
|
exc(lr, _("checking rename of %s") % short(n), inst, f)
|
2006-12-01 11:35:46 +03:00
|
|
|
|
2006-08-08 01:27:09 +04:00
|
|
|
# cross-check
|
2007-11-21 00:46:20 +03:00
|
|
|
if f in filenodes:
|
2010-01-25 09:05:27 +03:00
|
|
|
fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
|
2009-04-27 01:50:44 +04:00
|
|
|
for lr, node in sorted(fns):
|
2007-11-21 00:46:20 +03:00
|
|
|
err(lr, _("%s in manifests not found") % short(node), f)
|
2010-03-15 20:33:39 +03:00
|
|
|
ui.progress(_('checking'), None)
|
2006-08-08 01:27:09 +04:00
|
|
|
|
2008-08-14 05:18:41 +04:00
|
|
|
for f in storefiles:
|
|
|
|
warn(_("warning: orphan revlog '%s'") % f)
|
|
|
|
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.status(_("%d files, %d changesets, %d total revisions\n") %
|
2008-06-26 23:35:50 +04:00
|
|
|
(len(files), len(cl), revisions))
|
2006-08-08 01:27:09 +04:00
|
|
|
if warnings[0]:
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.warn(_("%d warnings encountered!\n") % warnings[0])
|
2006-08-08 01:27:09 +04:00
|
|
|
if errors[0]:
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.warn(_("%d integrity errors encountered!\n") % errors[0])
|
2008-06-26 23:35:50 +04:00
|
|
|
if badrevs:
|
2008-06-26 23:35:50 +04:00
|
|
|
ui.warn(_("(first damaged changeset appears to be %d)\n")
|
2008-06-26 23:35:50 +04:00
|
|
|
% min(badrevs))
|
2006-08-08 01:27:09 +04:00
|
|
|
return 1
|