sapling/hgext3rd/fastpartialmatch.py
Stanislau Hlebik 144ba285fb fastpartialmatch: initial commit
Summary:
Index to make node partial match fast i. e. speed up matching of partial hashes like f9bbd5.

This is an initial diff and it doesn't add any useful functionality, just debug commands.

Storage format is simple. There are a few files (at most 256).
Each file contains entries:



  <20-byte node hash><4 byte encoded rev>


Each entry represents a commit (node).
Name of the file is the first two letters of the hex node hash. Nodes with the
same first two letters go to the same file. Nodes are NOT sorted inside the file
to make appends of new nodes easier.
Partial index should always be correct i.e. it should contain only nodes that
are present in the repo (regardless of whether they are visible or not) and
rev numbers for nodes should be correct too.

Test Plan: arc unit

Reviewers: #sourcecontrol

Subscribers: mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D4474642
2017-02-02 05:12:10 -08:00

157 lines
4.7 KiB
Python

'''extension that makes node prefix lookup faster
Storage format is simple. There are a few files (at most 256).
Each file contains entries:
<20-byte node hash><4 byte encoded rev>
Name of the file is the first two letters of the hex node hash. Nodes with the
same first two letters go to the same file. Nodes are NOT sorted inside the file
to make appends of new nodes easier.
Partial index should always be correct i.e. it should contain only nodes that
are present in the repo (regardless of whether they are visible or not) and
rev numbers for nodes should be correct too.
'''
from mercurial import (
cmdutil,
error,
scmutil,
)
from mercurial.i18n import _
from mercurial.node import (
hex,
)
import os
import struct
LookupError = error.LookupError
cmdtable = {}
command = cmdutil.command(cmdtable)
_partialindexdir = 'partialindex'
_packstruct = struct.Struct('!L')
_nodesize = 20
_entrysize = _nodesize + _packstruct.size
@command('^debugprintpartialindexfile', [])
def debugprintpartialindexfile(ui, repo, *args):
'''Parses and prints partial index files
'''
if not args:
raise error.Abort(_('please specify a filename'))
for file in args:
fullpath = os.path.join(_partialindexdir, file)
if not repo.svfs.exists(fullpath):
ui.warn(_('file %s does not exist\n') % file)
continue
for node, rev in _readallentries(repo.svfs, fullpath):
ui.write('%s %d\n' % (hex(node), rev))
@command('^debugrebuildpartialindex', [])
def debugrebuildpartialindex(ui, repo):
'''Rebuild partial index from scratch
'''
_rebuildpartialindex(ui, repo)
@command('^debugcheckpartialindex', [])
def debugcheckfastpartialindex(ui, repo):
'''Command to check that partial index is consistent
It checks that revision numbers are correct and checks that partial index
has all the nodes from the repo.
'''
indexvfs = scmutil.vfs(repo.svfs.join(_partialindexdir))
foundnodes = set()
# Use unfiltered repo because index may have entries that point to hidden
# commits
ret = 0
repo = repo.unfiltered()
for entry in indexvfs.listdir():
if len(entry) == 2 and indexvfs.isfile(entry):
try:
for node, actualrev in _readallentries(indexvfs, entry):
expectedrev = repo.changelog.rev(node)
foundnodes.add(node)
if expectedrev != actualrev:
ret = 1
ui.warn(_('corrupted index: rev number for %s ' +
'should be %d but found %d\n') %
(hex(node), expectedrev, actualrev))
except ValueError as e:
ret = 1
ui.warn(_('%s file is corrupted: %s\n') % (entry, e))
for rev in repo:
node = repo[rev].node()
if node not in foundnodes:
ret = 1
ui.warn(_('%s node not found in partialindex\n') % hex(node))
return ret
def _rebuildpartialindex(ui, repo, skiphexnodes=None):
ui.debug('rebuilding partial node index\n')
repo = repo.unfiltered()
if not skiphexnodes:
skiphexnodes = set()
vfs = repo.svfs
tempdir = '.tmp' + _partialindexdir
if vfs.exists(_partialindexdir):
vfs.rmtree(_partialindexdir)
if vfs.exists(tempdir):
vfs.rmtree(tempdir)
vfs.mkdir(tempdir)
# Cache open file objects to not reopen the same files many times
fileobjs = {}
try:
indexvfs = _getopener(vfs.join(tempdir))
for rev in repo.changelog:
node = repo.changelog.node(rev)
hexnode = hex(node)
if hexnode in skiphexnodes:
continue
filename = hexnode[:2]
if filename not in fileobjs:
fileobjs[filename] = indexvfs(filename, mode='a')
fileobj = fileobjs[filename]
_writeindexentry(fileobj, node, rev)
vfs.rename(tempdir, _partialindexdir)
finally:
for fileobj in fileobjs.values():
fileobj.close()
def _getopener(path):
vfs = scmutil.vfs(path)
vfs.createmode = 0o644
return vfs
def _writeindexentry(fileobj, node, rev):
fileobj.write(node + _packstruct.pack(rev))
def _readallentries(vfs, file):
with vfs(file) as fileobj:
node, rev = _readindexentry(fileobj)
while node:
yield node, rev
node, rev = _readindexentry(fileobj)
def _readindexentry(fileobj):
line = fileobj.read(_entrysize)
if not line:
return None, None
if len(line) != _entrysize:
raise ValueError(_('corrupted index'))
node = line[:_nodesize]
rev = line[_nodesize:]
rev = _packstruct.unpack(rev)
return node, rev[0]