sqldirstate: the extension

Summary:
An extension replacing dirstate file with sqlite database so we can have incremental changes and we don't have to read the whole dirstate on every op. This makes sense only when hgwatchan/fsmonitor is on so we don't iterate through whole dirstate.
This is also using the sqlite transactions to handle dirstate transactions instead of copying db around. As a result of that "hg rollback" doesn't work anymore. You can fall back to copying things by setting sqldirstate.skipbackups to False.

Needs those to go to upstream to work: https://phabricator.intern.facebook.com/P56319612
(will send them once the freeze is over)

To use make sure that the extension is loaded *before* hgwatchman (watchman
should be outmost layer).

Test Plan:
Passing all but few mercurial tests (when running with skipbackups=False)
The failures are described in blacklist file.

Reviewers: lcharignon, wez, quark, durham

Reviewed By: durham

Subscribers: laurent, mjpieters, #sourcecontrol

Differential Revision: https://phabricator.intern.facebook.com/D3242547

Signature: t1:3242547:1462577481:fdbfb5287fb8d3e58f7b4d587c01de79ce6b78df
This commit is contained in:
Mateusz Kwapich 2016-05-06 16:56:45 -07:00
parent b3334a9d5c
commit 440d3ebb04
5 changed files with 787 additions and 0 deletions

99
sqldirstate/__init__.py Normal file
View File

@ -0,0 +1,99 @@
# __init__.py - sqldirstate extension
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
testedwith = 'internal'
from sqldirstate import makedirstate, DBFILE, toflat, tosql
from mercurial import error, cmdutil, localrepo, util
from mercurial.extensions import wrapfunction
def wrapfilecache(cls, propname, wrapper, *paths):
"""Wraps a filecache property. These can't be wrapped using the normal
wrapfunction. This should eventually go into upstream Mercurial.
"""
assert callable(wrapper)
for currcls in cls.__mro__:
if propname in currcls.__dict__:
origfn = currcls.__dict__[propname].func
assert callable(origfn)
def wrap(*args, **kwargs):
return wrapper(origfn, *args, **kwargs)
currcls.__dict__[propname].func = wrap
currcls.__dict__[propname].paths = paths
break
if currcls is object:
raise AttributeError(
_("type '%s' has no property '%s'") % (cls, propname))
def wrapjournalfiles(orig, self):
if util.safehasattr(self, 'requirements') and \
'sqldirstate' in self.requirements:
files = ()
for vfs, filename in orig(self):
if filename != 'journal.dirstate':
files += ((vfs, filename),)
if not self.ui.configbool('sqldirstate', 'skipbackups', True):
files += ((self.vfs, 'journal.dirstate.sqlite3'),)
else:
files = orig(self)
return files
def wrapdirstate(orig, self):
ds = orig(self)
if util.safehasattr(self, 'requirements') and \
'sqldirstate' in self.requirements:
ds.__class__ = makedirstate(ds.__class__)
ds._sqlinit()
return ds
def wrapnewreporequirements(orig, repo):
reqs = orig(repo)
if repo.ui.configbool('format', 'sqldirstate', True):
reqs.add('sqldirstate')
return reqs
cls = localrepo.localrepository
cls._basesupported.add('sqldirstate')
def uisetup(ui):
wrapfunction(localrepo, 'newreporequirements',
wrapnewreporequirements)
wrapfunction(localrepo.localrepository, '_journalfiles',
wrapjournalfiles)
wrapfilecache(localrepo.localrepository, 'dirstate',
wrapdirstate, DBFILE)
# debug commands
cmdtable = {}
command = cmdutil.command(cmdtable)
@command('debugsqldirstate', [], 'hg debugsqldirstate [on|off]')
def debugsqldirstate(ui, repo, cmd, **opts):
""" migrate to sqldirstate """
if cmd == "on":
if 'sqldirstate' not in repo.requirements:
repo.dirstate._read()
tosql(repo.dirstate)
repo.requirements.add('sqldirstate')
repo._writerequirements()
repo.dirstate._opener.unlink('dirstate')
else:
raise error.Abort("sqldirstate is already enabled")
if cmd == "off":
if 'sqldirstate' in repo.requirements:
toflat(repo.dirstate)
repo.requirements.remove('sqldirstate')
repo._writerequirements()
repo.dirstate._opener.unlink('dirstate.sqlite3')
else:
raise error.Abort("sqldirstate is disabled")

461
sqldirstate/sqldirstate.py Normal file
View File

@ -0,0 +1,461 @@
# dirstate.py - sqlite backed dirstate
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""
dirstate class replacement with sqlite backed storage
This allows us to make incremental changes to we don't have to read the whole
dirstate on every operation. This makes sense only when fsmonitor is on so we
don't iterate through whole dirstate.
sqldirstate stores the data in unnormalized form to avoid reading whole dirstate
to generate data like flat dirstate does.
It'is using the sqlite transactions to handle dirstate transactions instead of
copying db around. As a result of that "hg rollback" doesn't work anymore. You
can fall back to copying things by setting sqldirstate.skipbackups to False.
We commit sql transaction only when normal dirstate write would happen.
"""
import os
import sqlite3
from mercurial import dirstate, parsers, util
from mercurial.node import nullid, hex, bin
from mercurial.util import propertycache
from sqlmap import sqlmap
dirstatetuple = parsers.dirstatetuple
DBFILE = "dirstate.sqlite3"
def createotherschema(sqlconn):
""" The storage for all misc small key value data """
sqlconn.execute('''CREATE TABLE IF NOT EXISTS other (
key BLOB PRIMARY KEY,
value BLOB NOT NULL)
''')
sqlconn.commit()
sqlconn.execute('''INSERT OR REPLACE INTO other (key, value) VALUES
("schema_version", 1)''')
sqlconn.commit()
def dropotherschema(sqlconn):
cur = sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS other''')
cur.close()
sqlconn.commit()
class sqldirstatemap(sqlmap):
""" the main map - reflects the original dirstate file contents """
_tablename = 'files'
_keyname = 'filename'
_valuenames = ['status', 'mode', 'size', 'mtime']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS files (
filename BLOB PRIMARY KEY,
status BLOB NOT NULL,
mode INTEGER NOT NULL,
size INTEGER NOT NULL,
mtime INTEGER NOT NULL)
''')
cur.execute('''CREATE INDEX IF NOT EXISTS
files_mtime ON files(mtime);''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS files''')
cur.close()
self._sqlconn.commit()
def _rowtovalue(self, row):
return dirstatetuple(*row)
def _valuetorow(self, value):
return (value[0], value[1], value[2], value[3])
def nonnormalentries(self):
cur = self._sqlconn.cursor()
# -1 means that we should check the file on next status
cur.execute('''SELECT filename FROM files
WHERE status != 'n' or mtime = -1''')
rows = cur.fetchall()
cur.close()
return set(row[0] for row in rows)
def otherparententries(self):
cur = self._sqlconn.cursor()
# -2 means that file is comming from the other parent of the merge
# it's always dirty
cur.execute('''SELECT filename, status, mode, size, mtime FROM files '''
'''WHERE status = 'n' and size = -2;''')
for r in cur:
yield (r[0], self._rowtovalue(r[1:]))
cur.close()
def modifiedentries(self):
cur = self._sqlconn.cursor()
cur.execute('''SELECT filename, status, mode, size, mtime FROM files '''
'''WHERE status = 'm';''')
for r in cur:
yield (r[0], self._rowtovalue(r[1:]))
cur.close()
def resetnow(self, now):
cur = self._sqlconn.cursor()
cur.execute('''UPDATE files SET mtime = -1
WHERE status = 'n' and mtime = ?''', (now,))
cur.close()
class sqlcopymap(sqlmap):
""" all copy informations in dirstate """
_tablename = 'copymap'
_keyname = 'dest'
_valuenames = ['source']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS copymap(
dest BLOB PRIMARY KEY,
source BLOB NOT NULL)
''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS copymap''')
cur.close()
self._sqlconn.commit()
class sqlfilefoldmap(sqlmap):
""" in normal dirstate this map is generated on-the-fly from
the dirstate. We are opting for persistent foldmap so we don't
have read the whole dirstate """
_tablename = 'filefoldmap'
_keyname = 'normed'
_valuenames = ['real']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS filefoldmap (
normed BLOB PRIMARY KEY,
real BLOB NOT NULL)
''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS filefoldmap''')
cur.close()
self._sqlconn.commit()
class sqldirfoldmap(sqlmap):
""" in normal dirstate this map is generated on-the-fly from
the dirstate. We are opting for persistent foldmap so we don't
have read the whole dirstate """
_tablename = 'dirfoldmap'
_keyname = 'normed'
_valuenames = ['real']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS dirfoldmap (
normed BLOB PRIMARY KEY,
real BLOB NOT NULL)
''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS dirfoldmap''')
cur.close()
self._sqlconn.commit()
class sqldirsdict(sqlmap):
""" in normal dirstate this map is generated on-the-fly from
the dirstate. We are opting for persistent foldmap so we don't
have read the whole dirstate """
_tablename = 'dirs'
_keyname = 'dir'
_valuenames = ['count']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS dirs(
dir BLOB PRIMARY KEY,
count INT NOT NULL)
''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS dirs''')
cur.close()
self._sqlconn.commit()
class sqldirs(object):
""" Reimplementaion of util.dirs which is not resuseable because it's
replaced by c code if available. Probably with a small upstream
change we could reuse it """
def __init__(self, sqlconn, skip=None, filemap=None):
self._dirs = sqldirsdict(sqlconn)
if filemap:
for f, s in filemap.iteritems():
self.addpath(f)
# copied from util.py
def addpath(self, path):
dirs = self._dirs
for base in util.finddirs(path):
if base in dirs:
dirs[base] += 1
return
dirs[base] = 1
def delpath(self, path):
dirs = self._dirs
for base in util.finddirs(path):
if dirs[base] > 1:
dirs[base] -= 1
return
del dirs[base]
def __iter__(self):
return self._dirs.iterkeys()
def __contains__(self, d):
return d in self._dirs
def clear(self):
self._dirs.clear()
def makedirstate(cls):
class sqldirstate(cls):
def _sqlinit(self):
'''Create a new dirstate object.
opener is an open()-like callable that can be used to open the
dirstate file; root is the root of the directory tracked by
the dirstate.
'''
self._sqlfilename = self._opener.join(DBFILE)
self._sqlconn = sqlite3.connect(self._sqlfilename)
self._sqlconn.text_factory = str
createotherschema(self._sqlconn)
self._map = sqldirstatemap(self._sqlconn)
self._dirs = sqldirs(self._sqlconn)
self._copymap = sqlcopymap(self._sqlconn)
self._filefoldmap = sqlfilefoldmap(self._sqlconn)
self._dirfoldmap = sqldirfoldmap(self._sqlconn)
self.skipbackups = self._ui.configbool('sqldirstate', 'skipbackups',
True)
def _read(self):
pass
@propertycache
def _pl(self):
p1 = p2 = hex(nullid)
cur = self._sqlconn.cursor()
cur.execute('''SELECT key, value FROM other
WHERE key='p1' or key='p2' ''')
rows = cur.fetchall()
for r in rows:
if r[0] == 'p1':
p1 = r[1]
if r[0] == 'p2':
p2 = r[1]
cur.close()
return [bin(p1), bin(p2)]
def __setattr__(self, key, value):
if key == '_pl':
# because other methods in dirstate are setting it directly
# instead of using setparents
p1 = value[0]
p2 = value[1]
cur = self._sqlconn.cursor()
cur.executemany('''INSERT OR REPLACE INTO
other (key, value) VALUES (?, ?)''',
[('p1', hex(p1)), ('p2', hex(p2))])
cur.close()
self.__dict__['_pl'] = value
else:
return super(sqldirstate, self).__setattr__(key, value)
def savebackup(self, tr, suffix="", prefix=""):
if self.skipbackups:
return
self._writesqldirstate()
util.copyfile(self._opener.join(DBFILE),
self._opener.join(prefix + DBFILE + suffix))
def restorebackup(self, tr, suffix="", prefix=""):
if self.skipbackups:
return
self._opener.rename(prefix + DBFILE + suffix, DBFILE)
self.invalidate()
def clearbackup(self, tr, suffix="", prefix=""):
if self.skipbackups:
return
self._opener.unlink(prefix + DBFILE + suffix)
@propertycache
def _nonnormalset(self):
return self._map.nonnormalentries()
def invalidate(self):
self._sqlconn.rollback()
for a in ("_branch", "_pl", "_ignore", "_nonnormalset"):
if a in self.__dict__:
delattr(self, a)
self._lastnormaltime = 0
self._dirty = False
self._parentwriters = 0
self._sqlinit()
def write(self, tr=False):
# if dirty dump to disk (db transaction commit)
if not self._dirty:
return
now = dirstate._getfsnow(self._opener)
self._map.resetnow(now)
if tr:
tr.addfinalize("sqldirstate.write", self._backupandwrite)
return
self._writesqldirstate()
def _writedirstate(self, st):
self._writesqldirstate()
def _writesqldirstate(self):
# if dirty dump to disk (db transaction commit)
now = dirstate._getfsnow(self._opener)
self._map.resetnow(now)
self._sqlconn.commit()
self._lastnormaltime = 0
self._dirty = self._dirtypl = False
self._nonnormalset = self._map.nonnormalentries()
def _backupandwrite(self, tr):
if not self.skipbackups:
backuppath = self._opener.join('%s.%s' % (tr.journal, DBFILE))
util.copyfile(self._sqlfilename, backuppath)
tr._addbackupentry(('plain', self._sqlfilename,
backuppath, False))
self._writesqldirstate()
def clear(self):
self._map.clear()
self._nonnormalset = set()
self._dirs.clear()
self._copymap.clear()
self._filefoldmap.clear()
self._dirfoldmap.clear()
self._pl = [nullid, nullid]
self._lastnormaltime = 0
self._dirty = True
def setparents(self, p1, p2=nullid):
"""Set dirstate parents to p1 and p2.
When moving from two parents to one, 'm' merged entries a
adjusted to normal and previous copy records discarded and
returned by the call.
See localrepo.setparents()
"""
if self._parentwriters == 0:
raise ValueError("cannot set dirstate parent without "
"calling dirstate.beginparentchange")
self._dirty = self._dirtypl = True
oldp2 = self._pl[1]
self._pl = p1, p2
copies = {}
if oldp2 != nullid and p2 == nullid:
# Discard 'm' markers when moving away from a merge state
for f, s in self._map.modifiedentries():
if f in self._copymap:
copies[f] = self._copymap[f]
self.normallookup(f)
# Also fix up otherparent markers
for f, s in self._map.otherparententries():
if f in self._copymap:
copies[f] = self._copymap[f]
self.add(f)
return copies
return sqldirstate
def tosql(dirstate):
# converts a flat dirstate to sqldirstate
sqlfilename = dirstate._opener.join(DBFILE)
try:
os.unlink(sqlfilename)
except OSError:
pass
sqlconn = sqlite3.connect(sqlfilename)
sqlconn.text_factory = str
createotherschema(sqlconn)
sqlmap = sqldirstatemap(sqlconn)
copymap = sqlcopymap(sqlconn)
filefoldmap = sqlfilefoldmap(sqlconn)
dirfoldmap = sqldirfoldmap(sqlconn)
sqldirs(sqlconn, filemap=dirstate._map)
for name, s in dirstate._map.iteritems():
sqlmap[name] = s
for k, v in dirstate._copymap.iteritems():
copymap[k] = v
for k, v in dirstate._filefoldmap.iteritems():
filefoldmap[k] = v
for k, v in dirstate._dirfoldmap.iteritems():
dirfoldmap[k] = v
cur = sqlconn.cursor()
cur.executemany('''INSERT OR REPLACE INTO
other (key, value)
VALUES (?, ?)''',
[('p1', hex(dirstate.p1())), ('p2', hex(dirstate.p2()))]
)
cur.close()
sqlconn.commit()
def toflat(sqldirstate):
# converts a sqldirstate to a flat one
st = sqldirstate._opener("dirstate", "w", atomictemp=True)
newmap = {}
for k, v in sqldirstate._map.iteritems():
newmap[k] = v
newcopymap = {}
for k, v in sqldirstate._copymap.iteritems():
newcopymap[k] = v
st.write(parsers.pack_dirstate(newmap, newcopymap, sqldirstate._pl,
dirstate._getfsnow(sqldirstate._opener)))
st.close()

121
sqldirstate/sqlmap.py Normal file
View File

@ -0,0 +1,121 @@
# sqlmap.py - sql backed dictionary
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from abc import abstractmethod, ABCMeta
import collections
from mercurial import parsers
dirstatetuple = parsers.dirstatetuple
class sqlmap(collections.MutableMapping):
""" a dictionary-like object backed by sqllite db."""
__metaclass__ = ABCMeta
def __init__(self, sqlconn):
self._sqlconn = sqlconn
self.createschema()
@abstractmethod
def createschema(self):
""" create db table if doesn't exist """
pass
@abstractmethod
def dropschema(self):
""" drop db table """
pass
def _rowtovalue(self, row):
""" converts row of db to a value format """
return row[0]
def _valuetorow(self, value):
""" convers provided value to db row format """
return (value,)
@property
def _numcols(self):
return 1 + len(self._valuenames)
@property
def _valuenamesstr(self):
return ', '.join(self._valuenames)
def __setitem__(self, key, item):
cur = self._sqlconn.cursor()
item = self._valuetorow(item)
cur.execute('''INSERT OR REPLACE INTO
{0} ({1}, {2})
VALUES ({3})'''.format(
self._tablename, self._keyname, self._valuenamesstr,
', '.join(['?'] * self._numcols)),
(key,) + item)
cur.close()
def __getitem__(self, key):
cur = self._sqlconn.cursor()
cur.execute('''SELECT {2} FROM {0} WHERE {1}=?'''.format(
self._tablename, self._keyname, self._valuenamesstr), (key,))
row = cur.fetchone()
cur.close()
if row is None:
raise KeyError("key %s not found" % key)
return self._rowtovalue(row)
def __delitem__(self, key):
cur = self._sqlconn.cursor()
cur.execute('''DELETE FROM {0} WHERE {1}=?'''.format(
self._tablename, self._keyname), (key,))
if cur.rowcount == 0:
raise KeyError("key %s not found" % key)
cur.close()
def __len__(self):
cur = self._sqlconn.cursor()
cur.execute('''SELECT COUNT(*) FROM {0}'''.format(self._tablename))
res = cur.fetchone()
cur.close()
return res[0]
def clear(self):
cur = self._sqlconn.cursor()
cur.execute('''DELETE FROM {0}'''.format(self._tablename))
cur.close()
def copy(self):
return dict(self)
def keys(self):
cur = self._sqlconn.cursor()
cur.execute('''SELECT {1} FROM {0}'''.format(self._tablename,
self._keyname))
keys = cur.fetchall()
cur.close()
return [k[0] for k in keys]
def __iter__(self):
cur = self._sqlconn.cursor()
cur.execute('''SELECT {1} FROM {0}'''.format(self._tablename,
self._keyname))
for r in cur:
yield r[0]
cur.close()
def iteritems(self):
cur = self._sqlconn.cursor()
cur.execute('''SELECT {1}, {2} from {0}'''.format(self._tablename,
self._keyname,
self._valuenamesstr))
for r in cur:
yield (r[0], self._rowtovalue(r[1:]))
cur.close()

View File

@ -0,0 +1,47 @@
# non-significant output changes
test-empty.t
test-basic.t
test-largefiles-small-disk.t
test-debugextensions.t
test-inherit-mode.t
test-init.t
test-completion.t
test-lfconvert.t
test-fncache.t
test-clonebundles.t
test-ssh.t
test-ssh-bundle1.t
test-globalopts.t
test-extension.t
test-hgweb-commands.t
test-commandserver.t
test-help.t
# zeroconf
test-paths.t
# tests changing hgrc after initinializing repos
# and failing because of the not met requirements.
test-hgrc.t
test-hardlinks.t
test-keyword.t
test-treemanifest.t
test-wireproto.t
test-run-tests.t
# tests chmoding -w .hg directory
test-eol.t
test-phases-exchange.t
test-issue3084.t
# Legit failures:
# bundlerepos don't work yet.
# test-bundle.t
# test-shelve.t
# test-import.t
# the wd parent is wrong
# test-obsolete.t
# tries to open a sqlite db folr http repo
# test-static-http.t

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
import optparse
import os
import subprocess
import sys
# PassThroughOptionParse is from the Optik source distribution, (c) 2001-2006
# Gregory P. Ward. Used under the BSD license.
class PassThroughOptionParser(optparse.OptionParser):
def _process_long_opt(self, rargs, values):
try:
optparse.OptionParser._process_long_opt(self, rargs, values)
except optparse.BadOptionError as err:
self.largs.append(err.opt_str)
def _process_short_opts(self, rargs, values):
try:
optparse.OptionParser._process_short_opts(self, rargs, values)
except optparse.BadOptionError as err:
self.largs.append(err.opt_str)
def parseargs(argv):
parser = PassThroughOptionParser(usage='%prog [options]',
epilog='Any additional options and arguments are passed through to '
'REPO/tests/run-tests.py.')
parser.add_option('--hg', type='string',
metavar='REPO',
help='Mercurial repository to run tests against')
parser.add_option('--disable-blacklist', action='store_true',
default=False,
help='disable default test blacklist')
options, args = parser.parse_args(argv)
if not options.hg:
parser.error('Mercurial repository not specified')
return options, args
def main(argv):
options, args = parseargs(argv)
thisdir = os.path.dirname(os.path.realpath(__file__))
extroot = os.path.join(os.path.dirname(thisdir), 'sqldirstate')
extopts = ['--extra-config-opt', 'extensions.sqldirstate=%s' % extroot,
'--extra-config-opt', 'sqldirstate.skipbackups=False',
'--extra-config-opt', 'format.sqldirstate=True']
if not options.disable_blacklist:
extopts += ['--blacklist',
os.path.join(thisdir, 'blacklist-sqldirstate')]
cwd = os.path.expanduser(os.path.join(options.hg, 'tests'))
cmd = [os.path.join(cwd, 'run-tests.py')] + extopts + args
return subprocess.call(cmd, cwd=cwd)
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))