mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 01:07:15 +03:00
37ed176ad7
An invalid entry is any entry with a base not in the pack, or whose deltabases form a cycle. If there are any entries like that, the output will look like this: ``` (Root): Node Delta Base Delta Length Blob Size 665a7e7913af e66038a2894e 61 2142 52bd634be310 000000000000 2142 2142 8b5847087ce0 000000000000 2142 2142 960f5acb3e99 edf2ffd7daab 162 2142 b7d7e5aa692e 8b5847087ce0 162 2142 cdcc4d74d667 960f5acb3e99 324 2142 Total: 14652 48920 (70.0% smaller) Bad entry: 960f5acb3e99 has an unknown deltabase (edf2ffd7daab) Bad entry: b7d7e5aa692e has an unknown deltabase (edf2ffd7daab) 2 invalid entries ``` Differential Revision: https://phab.mercurial-scm.org/D1271
369 lines
12 KiB
Python
369 lines
12 KiB
Python
# debugcommands.py - debug logic for remotefilelog
|
|
#
|
|
# Copyright 2013 Facebook, Inc.
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
from __future__ import absolute_import
|
|
|
|
from mercurial import error, filelog, revlog
|
|
from mercurial.node import bin, hex, nullid, short
|
|
from mercurial.i18n import _
|
|
from . import (
|
|
constants,
|
|
datapack,
|
|
fileserverclient,
|
|
historypack,
|
|
shallowrepo,
|
|
shallowutil,
|
|
)
|
|
from .lz4wrapper import lz4decompress
|
|
import hashlib, os
|
|
|
|
def debugremotefilelog(ui, path, **opts):
|
|
decompress = opts.get('decompress')
|
|
|
|
size, firstnode, mapping = parsefileblob(path, decompress)
|
|
|
|
ui.status(_("size: %s bytes\n") % (size))
|
|
ui.status(_("path: %s \n") % (path))
|
|
ui.status(_("key: %s \n") % (short(firstnode)))
|
|
ui.status(_("\n"))
|
|
ui.status(_("%12s => %12s %13s %13s %12s\n") %
|
|
("node", "p1", "p2", "linknode", "copyfrom"))
|
|
|
|
queue = [firstnode]
|
|
while queue:
|
|
node = queue.pop(0)
|
|
p1, p2, linknode, copyfrom = mapping[node]
|
|
ui.status(_("%s => %s %s %s %s\n") %
|
|
(short(node), short(p1), short(p2), short(linknode), copyfrom))
|
|
if p1 != nullid:
|
|
queue.append(p1)
|
|
if p2 != nullid:
|
|
queue.append(p2)
|
|
|
|
def buildtemprevlog(repo, file):
|
|
# get filename key
|
|
filekey = hashlib.sha1(file).hexdigest()
|
|
filedir = os.path.join(repo.path, 'store/data', filekey)
|
|
|
|
# sort all entries based on linkrev
|
|
fctxs = []
|
|
for filenode in os.listdir(filedir):
|
|
if '_old' not in filenode:
|
|
fctxs.append(repo.filectx(file, fileid=bin(filenode)))
|
|
|
|
fctxs = sorted(fctxs, key=lambda x: x.linkrev())
|
|
|
|
# add to revlog
|
|
temppath = repo.sjoin('data/temprevlog.i')
|
|
if os.path.exists(temppath):
|
|
os.remove(temppath)
|
|
r = filelog.filelog(repo.svfs, 'temprevlog')
|
|
|
|
class faket(object):
|
|
def add(self, a, b, c):
|
|
pass
|
|
t = faket()
|
|
for fctx in fctxs:
|
|
if fctx.node() not in repo:
|
|
continue
|
|
|
|
p = fctx.filelog().parents(fctx.filenode())
|
|
meta = {}
|
|
if fctx.renamed():
|
|
meta['copy'] = fctx.renamed()[0]
|
|
meta['copyrev'] = hex(fctx.renamed()[1])
|
|
|
|
r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1])
|
|
|
|
return r
|
|
|
|
def debugindex(orig, ui, repo, file_=None, **opts):
|
|
"""dump the contents of an index file"""
|
|
if (opts.get('changelog') or
|
|
opts.get('manifest') or
|
|
opts.get('dir') or
|
|
not shallowrepo.requirement in repo.requirements or
|
|
not repo.shallowmatch(file_)):
|
|
return orig(ui, repo, file_, **opts)
|
|
|
|
r = buildtemprevlog(repo, file_)
|
|
|
|
# debugindex like normal
|
|
format = opts.get('format', 0)
|
|
if format not in (0, 1):
|
|
raise error.Abort(_("unknown format %d") % format)
|
|
|
|
generaldelta = r.version & revlog.FLAG_GENERALDELTA
|
|
if generaldelta:
|
|
basehdr = ' delta'
|
|
else:
|
|
basehdr = ' base'
|
|
|
|
if format == 0:
|
|
ui.write((" rev offset length " + basehdr + " linkrev"
|
|
" nodeid p1 p2\n"))
|
|
elif format == 1:
|
|
ui.write((" rev flag offset length"
|
|
" size " + basehdr + " link p1 p2"
|
|
" nodeid\n"))
|
|
|
|
for i in r:
|
|
node = r.node(i)
|
|
if generaldelta:
|
|
base = r.deltaparent(i)
|
|
else:
|
|
base = r.chainbase(i)
|
|
if format == 0:
|
|
try:
|
|
pp = r.parents(node)
|
|
except Exception:
|
|
pp = [nullid, nullid]
|
|
ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % (
|
|
i, r.start(i), r.length(i), base, r.linkrev(i),
|
|
short(node), short(pp[0]), short(pp[1])))
|
|
elif format == 1:
|
|
pr = r.parentrevs(i)
|
|
ui.write("% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n" % (
|
|
i, r.flags(i), r.start(i), r.length(i), r.rawsize(i),
|
|
base, r.linkrev(i), pr[0], pr[1], short(node)))
|
|
|
|
def debugindexdot(orig, ui, repo, file_):
|
|
"""dump an index DAG as a graphviz dot file"""
|
|
if not shallowrepo.requirement in repo.requirements:
|
|
return orig(ui, repo, file_)
|
|
|
|
r = buildtemprevlog(repo, os.path.basename(file_)[:-2])
|
|
|
|
ui.write(("digraph G {\n"))
|
|
for i in r:
|
|
node = r.node(i)
|
|
pp = r.parents(node)
|
|
ui.write("\t%d -> %d\n" % (r.rev(pp[0]), i))
|
|
if pp[1] != nullid:
|
|
ui.write("\t%d -> %d\n" % (r.rev(pp[1]), i))
|
|
ui.write("}\n")
|
|
|
|
def verifyremotefilelog(ui, path, **opts):
|
|
decompress = opts.get('decompress')
|
|
|
|
for root, dirs, files in os.walk(path):
|
|
for file in files:
|
|
if file == "repos":
|
|
continue
|
|
filepath = os.path.join(root, file)
|
|
size, firstnode, mapping = parsefileblob(filepath, decompress)
|
|
for p1, p2, linknode, copyfrom in mapping.itervalues():
|
|
if linknode == nullid:
|
|
actualpath = os.path.relpath(root, path)
|
|
key = fileserverclient.getcachekey("reponame", actualpath,
|
|
file)
|
|
ui.status("%s %s\n" % (key, os.path.relpath(filepath,
|
|
path)))
|
|
|
|
def parsefileblob(path, decompress):
|
|
raw = None
|
|
f = open(path, "r")
|
|
try:
|
|
raw = f.read()
|
|
finally:
|
|
f.close()
|
|
|
|
if decompress:
|
|
raw = lz4decompress(raw)
|
|
|
|
offset, size, flags = shallowutil.parsesizeflags(raw)
|
|
start = offset + size
|
|
|
|
firstnode = None
|
|
|
|
mapping = {}
|
|
while start < len(raw):
|
|
divider = raw.index('\0', start + 80)
|
|
|
|
currentnode = raw[start:(start + 20)]
|
|
if not firstnode:
|
|
firstnode = currentnode
|
|
|
|
p1 = raw[(start + 20):(start + 40)]
|
|
p2 = raw[(start + 40):(start + 60)]
|
|
linknode = raw[(start + 60):(start + 80)]
|
|
copyfrom = raw[(start + 80):divider]
|
|
|
|
mapping[currentnode] = (p1, p2, linknode, copyfrom)
|
|
start = divider + 1
|
|
|
|
return size, firstnode, mapping
|
|
|
|
def debugdatapack(ui, *paths, **opts):
|
|
for path in paths:
|
|
if '.data' in path:
|
|
path = path[:path.index('.data')]
|
|
ui.write("%s:\n" % path)
|
|
dpack = datapack.datapack(path)
|
|
node = opts.get('node')
|
|
if node:
|
|
deltachain = dpack.getdeltachain('', bin(node))
|
|
dumpdeltachain(ui, deltachain, **opts)
|
|
return
|
|
|
|
if opts.get('long'):
|
|
hashformatter = hex
|
|
hashlen = 42
|
|
else:
|
|
hashformatter = short
|
|
hashlen = 14
|
|
|
|
lastfilename = None
|
|
totaldeltasize = 0
|
|
totalblobsize = 0
|
|
def printtotals():
|
|
if lastfilename is not None:
|
|
ui.write("\n")
|
|
if not totaldeltasize or not totalblobsize:
|
|
return
|
|
difference = totalblobsize - totaldeltasize
|
|
deltastr = "%0.1f%% %s" % (
|
|
(100.0 * abs(difference) / totalblobsize),
|
|
("smaller" if difference > 0 else "bigger"))
|
|
|
|
ui.write(("Total:%s%s %s (%s)\n") % (
|
|
"".ljust(2 * hashlen - len("Total:")),
|
|
str(totaldeltasize).ljust(12),
|
|
str(totalblobsize).ljust(9),
|
|
deltastr
|
|
))
|
|
|
|
bases = {}
|
|
nodes = set()
|
|
failures = 0
|
|
for filename, node, deltabase, deltalen in dpack.iterentries():
|
|
bases[node] = deltabase
|
|
if node in nodes:
|
|
ui.write(("Bad entry: %s appears twice\n" % short(node)))
|
|
failures += 1
|
|
nodes.add(node)
|
|
if filename != lastfilename:
|
|
printtotals()
|
|
name = '(empty name)' if filename == '' else filename
|
|
ui.write("%s:\n" % name)
|
|
ui.write("%s%s%s%s\n" % (
|
|
"Node".ljust(hashlen),
|
|
"Delta Base".ljust(hashlen),
|
|
"Delta Length".ljust(14),
|
|
"Blob Size".ljust(9)))
|
|
lastfilename = filename
|
|
totalblobsize = 0
|
|
totaldeltasize = 0
|
|
|
|
# Metadata could be missing, in which case it will be an empty dict.
|
|
meta = dpack.getmeta(filename, node)
|
|
if constants.METAKEYSIZE in meta:
|
|
blobsize = meta[constants.METAKEYSIZE]
|
|
totaldeltasize += deltalen
|
|
totalblobsize += blobsize
|
|
else:
|
|
blobsize = "(missing)"
|
|
ui.write("%s %s %s%s\n" % (
|
|
hashformatter(node),
|
|
hashformatter(deltabase),
|
|
str(deltalen).ljust(14),
|
|
blobsize))
|
|
|
|
if filename is not None:
|
|
printtotals()
|
|
|
|
failures += _sanitycheck(ui, set(nodes), bases)
|
|
if failures > 1:
|
|
ui.warn(("%d failures\n" % failures))
|
|
return 1
|
|
|
|
def _sanitycheck(ui, nodes, bases):
|
|
"""
|
|
Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a
|
|
mapping of node->base):
|
|
|
|
- Each deltabase must itself be a node elsewhere in the pack
|
|
- There must be no cycles
|
|
"""
|
|
failures = 0
|
|
for node in nodes:
|
|
seen = set()
|
|
current = node
|
|
deltabase = bases[current]
|
|
|
|
while deltabase != nullid:
|
|
if deltabase not in nodes:
|
|
ui.warn(("Bad entry: %s has an unknown deltabase (%s)\n" %
|
|
(short(node), short(deltabase))))
|
|
failures += 1
|
|
break
|
|
|
|
if deltabase in seen:
|
|
ui.warn(("Bad entry: %s has a cycle (at %s)\n" %
|
|
(short(node), short(deltabase))))
|
|
failures += 1
|
|
break
|
|
|
|
current = deltabase
|
|
seen.add(current)
|
|
deltabase = bases[current]
|
|
# Since ``node`` begins a valid chain, reset/memoize its base to nullid
|
|
# so we don't traverse it again.
|
|
bases[node] = nullid
|
|
return failures
|
|
|
|
def dumpdeltachain(ui, deltachain, **opts):
|
|
hashformatter = hex
|
|
hashlen = 40
|
|
|
|
lastfilename = None
|
|
for filename, node, filename, deltabasenode, delta in deltachain:
|
|
if filename != lastfilename:
|
|
ui.write("\n%s\n" % filename)
|
|
lastfilename = filename
|
|
ui.write("%s %s %s %s\n" % (
|
|
"Node".ljust(hashlen),
|
|
"Delta Base".ljust(hashlen),
|
|
"Delta SHA1".ljust(hashlen),
|
|
"Delta Length".ljust(6),
|
|
))
|
|
|
|
ui.write("%s %s %s %s\n" % (
|
|
hashformatter(node),
|
|
hashformatter(deltabasenode),
|
|
hashlib.sha1(delta).hexdigest(),
|
|
len(delta)))
|
|
|
|
def debughistorypack(ui, path):
|
|
if '.hist' in path:
|
|
path = path[:path.index('.hist')]
|
|
hpack = historypack.historypack(path)
|
|
|
|
lastfilename = None
|
|
for entry in hpack.iterentries():
|
|
filename, node, p1node, p2node, linknode, copyfrom = entry
|
|
if filename != lastfilename:
|
|
ui.write("\n%s\n" % filename)
|
|
ui.write("%s%s%s%s%s\n" % (
|
|
"Node".ljust(14),
|
|
"P1 Node".ljust(14),
|
|
"P2 Node".ljust(14),
|
|
"Link Node".ljust(14),
|
|
"Copy From"))
|
|
lastfilename = filename
|
|
ui.write("%s %s %s %s %s\n" % (short(node), short(p1node),
|
|
short(p2node), short(linknode), copyfrom))
|
|
|
|
def debugwaitonrepack(repo):
|
|
with repo._lock(repo.svfs, "repacklock", True, None,
|
|
None, _('repacking %s') % repo.origroot):
|
|
pass
|
|
|
|
def debugwaitonprefetch(repo):
|
|
with repo._lock(repo.svfs, "prefetchlock", True, None,
|
|
None, _('prefetching in %s') % repo.origroot):
|
|
pass
|