sapling/sqldirstate/sqldirstate.py
Mateusz Kwapich 675561c76f sqldirstate: call the registered callbacks on wd parent change
Summary:
This diff together with patches introducing the mentioned callback in upstream
will fix the journal under sqldirstate.

Test Plan: journal tests passed, the rest is currently running - will update the test plan soon

Reviewers: #mercurial, ttung, durham

Reviewed By: durham

Subscribers: durham, mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D3690878

Signature: t1:3690878:1470800077:3fe48e03bb65eaeed3a7326b6e7d392abe7efd8d
2016-08-15 03:46:52 -07:00

636 lines
22 KiB
Python

# dirstate.py - sqlite backed dirstate
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""
dirstate class replacement with sqlite backed storage
This allows us to make incremental changes to we don't have to read the whole
dirstate on every operation. This makes sense only when fsmonitor is on so we
don't iterate through whole dirstate.
sqldirstate stores the data in unnormalized form to avoid reading whole dirstate
to generate data like flat dirstate does.
It'is using the sqlite transactions to handle dirstate transactions instead of
copying db around. As a result of that "hg rollback" doesn't work anymore. You
can fall back to copying things by setting sqldirstate.skipbackups to False.
We commit sql transaction only when normal dirstate write would happen.
"""
from exceptions import RuntimeError
import os
import sqlite3
from mercurial import dirstate, parsers, util
from mercurial.node import nullid, hex, bin
from mercurial.util import propertycache
from sqlmap import sqlmap
from sqltrace import tracewrapsqlconn
dirstatetuple = parsers.dirstatetuple
DBFILE = "dirstate.sqlite3"
FAKEDIRSTATE = "dirstate"
SQLITE_CACHE_SIZE = -100000 # 100MB
SQL_SCHEMA_VERSION = 2
class SQLSchemaVersionUnsupported(RuntimeError):
def __init__(self, version):
self._version = version
def __str__(self):
return "sqldirstate schema version not supported (%s)" % self._version
def createotherschema(sqlconn):
""" The storage for all misc small key value data """
sqlconn.execute('''CREATE TABLE IF NOT EXISTS other (
key BLOB PRIMARY KEY,
value BLOB NOT NULL)
''')
sqlconn.commit()
def setversion(sqlconn, version):
sqlconn.execute('''INSERT OR REPLACE INTO other (key, value) VALUES
("schema_version", ?)''', str(version))
sqlconn.commit()
def getversion(sqlconn):
cur = sqlconn.cursor()
cur.execute('''SELECT value FROM other
WHERE key = "schema_version"''')
row = cur.fetchone()
cur.close()
if row is None:
setversion(sqlconn, SQL_SCHEMA_VERSION)
return SQL_SCHEMA_VERSION
else:
return int(row[0])
def dropotherschema(sqlconn):
cur = sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS other''')
cur.close()
sqlconn.commit()
class sqldirstatemap(sqlmap):
""" the main map - reflects the original dirstate file contents """
_tablename = 'files'
_keyname = 'filename'
_valuenames = ['status', 'mode', 'size', 'mtime']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS files (
filename BLOB PRIMARY KEY,
status BLOB NOT NULL,
mode INTEGER NOT NULL,
size INTEGER NOT NULL,
mtime INTEGER NOT NULL)
''')
# The low cardinality of the status column makes it basically
# non-indexable.
# There is a feature we could use available sqlite 3.9 called partial
# indexes. Using it we could index only the files that have non-normal
# statuses.
# The sqlite 3.6 that we are using on centos6 unfortunately doesn't
# support it. So we are maintaining separate table with nonnormalfiles.
cur.execute('''CREATE TABLE IF NOT EXISTS nonnormalfiles (
filename BLOB PRIMARY KEY,
status BLOB NOT NULL,
mode INTEGER NOT NULL,
size INTEGER NOT NULL,
mtime INTEGER NOT NULL)
''')
cur.execute('''CREATE INDEX IF NOT EXISTS
files_mtime ON files(mtime);''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS files''')
cur.execute('''DROP TABLE IF EXISTS nonnormalfiles''')
cur.close()
self._sqlconn.commit()
def _rowtovalue(self, row):
return dirstatetuple(*row)
def _valuetorow(self, value):
return (value[0], value[1], value[2], value[3])
def nonnormalentries(self):
cur = self._sqlconn.cursor()
# -1 means that we should check the file on next status
cur.execute('''SELECT filename FROM files
WHERE mtime = -1''')
rows = cur.fetchall()
cur.execute('''SELECT filename FROM nonnormalfiles''')
rows += cur.fetchall()
cur.close()
return set(row[0] for row in rows)
def otherparententries(self):
cur = self._sqlconn.cursor()
# -2 means that file is comming from the other parent of the merge
# it's always dirty
cur.execute('''SELECT filename, status, mode, size, mtime FROM files '''
'''WHERE status = 'n' and size = -2;''')
for r in cur:
yield (r[0], self._rowtovalue(r[1:]))
cur.close()
def modifiedentries(self):
cur = self._sqlconn.cursor()
cur.execute('''SELECT filename, status, mode, size, mtime FROM '''
'''nonnormalfiles WHERE status = 'm';''')
for r in cur:
yield (r[0], self._rowtovalue(r[1:]))
cur.close()
def resetnow(self, now, nonnormalset=None):
cur = self._sqlconn.cursor()
cur.execute('''UPDATE files SET mtime = -1
WHERE mtime = ? and status = 'n' ''', (now,))
if self._lookupcache is not None:
for k, v in self._lookupcache.iteritems():
status, mode, size, mtime = v
if status == 'n' and mtime == now:
mtime = -1
self._lookupcache[k] = status, mode, size, mtime
if nonnormalset is not None:
nonnormalset.add(k)
cur.close()
def __setitem__(self, key, item):
super(sqldirstatemap, self).__setitem__(key, item)
status = item[0]
cur = self._sqlconn.cursor()
if status != 'n':
item = self._valuetorow(item)
cur.execute('''INSERT OR REPLACE INTO nonnormalfiles ({keyname},
{valuenames}) VALUES ({placeholders})'''.format(
**self._querytemplateargs), (key,) + item)
else:
cur.execute('''DELETE FROM nonnormalfiles
WHERE {keyname}=?'''.format(
**self._querytemplateargs), (key,))
cur.close()
def _update(self, otherdict):
super(sqldirstatemap, self)._update(otherdict)
updatelist = list()
deletelist = list()
for key, item in otherdict.iteritems():
if item[0] != 'n':
updatelist.append((key,) + self._valuetorow(item))
else:
deletelist.append((key,))
cur = self._sqlconn.cursor()
cur.executemany('''INSERT OR REPLACE INTO nonnormalfiles
({keyname}, {valuenames}) VALUES ({placeholders})'''.format(
**self._querytemplateargs), updatelist)
cur.executemany('''DELETE FROM nonnormalfiles
WHERE {keyname}=?'''.format(
**self._querytemplateargs), deletelist)
cur.close()
class sqlcopymap(sqlmap):
""" all copy informations in dirstate """
_tablename = 'copymap'
_keyname = 'dest'
_valuenames = ['source']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS copymap(
dest BLOB PRIMARY KEY,
source BLOB NOT NULL)
''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS copymap''')
cur.close()
self._sqlconn.commit()
class sqlfilefoldmap(sqlmap):
""" in normal dirstate this map is generated on-the-fly from
the dirstate. We are opting for persistent foldmap so we don't
have read the whole dirstate """
_tablename = 'filefoldmap'
_keyname = 'normed'
_valuenames = ['real']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS filefoldmap (
normed BLOB PRIMARY KEY,
real BLOB NOT NULL)
''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS filefoldmap''')
cur.close()
self._sqlconn.commit()
class sqldirfoldmap(sqlmap):
""" in normal dirstate this map is generated on-the-fly from
the dirstate. We are opting for persistent foldmap so we don't
have read the whole dirstate """
_tablename = 'dirfoldmap'
_keyname = 'normed'
_valuenames = ['real']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS dirfoldmap (
normed BLOB PRIMARY KEY,
real BLOB NOT NULL)
''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS dirfoldmap''')
cur.close()
self._sqlconn.commit()
class sqldirsdict(sqlmap):
""" in normal dirstate this map is generated on-the-fly from
the dirstate. We are opting for persistent foldmap so we don't
have read the whole dirstate """
_tablename = 'dirs'
_keyname = 'dir'
_valuenames = ['count']
def createschema(self):
cur = self._sqlconn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS dirs(
dir BLOB PRIMARY KEY,
count INT NOT NULL)
''')
cur.close()
self._sqlconn.commit()
def dropschema(self):
cur = self._sqlconn.cursor()
cur.execute('''DROP TABLE IF EXISTS dirs''')
cur.close()
self._sqlconn.commit()
class sqldirs(object):
""" Reimplementaion of util.dirs which is not resuseable because it's
replaced by c code if available. Probably with a small upstream
change we could reuse it """
def __init__(self, sqlconn, skip=None, filemap=None, dirsdict=None):
self._dirs = dirsdict
if self._dirs is None:
self._dirs = sqldirsdict(sqlconn)
if filemap:
for f, s in filemap.iteritems():
self.addpath(f)
# copied from util.py
def addpath(self, path):
dirs = self._dirs
for base in util.finddirs(path):
if base in dirs:
dirs[base] += 1
return
dirs[base] = 1
def delpath(self, path):
dirs = self._dirs
for base in util.finddirs(path):
if dirs[base] > 1:
dirs[base] -= 1
return
del dirs[base]
def __iter__(self):
return self._dirs.iterkeys()
def __contains__(self, d):
return d in self._dirs
def clear(self):
self._dirs.clear()
@property
def dirsdict(self):
return self._dirs
def makedirstate(cls):
class sqldirstate(cls):
def _sqlinit(self):
'''Create a new dirstate object.
opener is an open()-like callable that can be used to open the
dirstate file; root is the root of the directory tracked by
the dirstate.
'''
self._sqlfilename = self._opener.join(DBFILE)
self._sqlconn = sqlite3.connect(self._sqlfilename)
self._sqlconn.text_factory = str
if self._ui.config('sqldirstate', 'tracefile', False):
self._sqlconn = tracewrapsqlconn(self._sqlconn,
self._ui.config('sqldirstate', 'tracefile'))
self._sqlconn.execute("PRAGMA cache_size = %d" % SQLITE_CACHE_SIZE)
self._sqlconn.execute("PRAGMA synchronous = OFF")
createotherschema(self._sqlconn)
self._sqlschemaversion = getversion(self._sqlconn)
self._map = sqldirstatemap(self._sqlconn)
self._dirs = sqldirs(self._sqlconn)
self._copymap = sqlcopymap(self._sqlconn)
self._filefoldmap = sqlfilefoldmap(self._sqlconn)
self._dirfoldmap = sqldirfoldmap(self._sqlconn)
self.skipbackups = self._ui.configbool('sqldirstate', 'skipbackups',
True)
if self._sqlschemaversion > SQL_SCHEMA_VERSION:
# TODO: add recovery mechanism
raise SQLSchemaVersionUnsupported(self._sqlschemaversion)
self._sqlmigration()
def _read(self):
pass
@propertycache
def _pl(self):
p1 = p2 = hex(nullid)
cur = self._sqlconn.cursor()
cur.execute('''SELECT key, value FROM other
WHERE key='p1' or key='p2' ''')
rows = cur.fetchall()
for r in rows:
if r[0] == 'p1':
p1 = r[1]
if r[0] == 'p2':
p2 = r[1]
cur.close()
return [bin(p1), bin(p2)]
def __setattr__(self, key, value):
if key == '_pl':
# because other methods in dirstate are setting it directly
# instead of using setparents
p1 = value[0]
p2 = value[1]
cur = self._sqlconn.cursor()
cur.executemany('''INSERT OR REPLACE INTO
other (key, value) VALUES (?, ?)''',
[('p1', hex(p1)), ('p2', hex(p2))])
cur.close()
self.__dict__['_pl'] = value
else:
return super(sqldirstate, self).__setattr__(key, value)
def savebackup(self, tr, suffix="", prefix=""):
if self.skipbackups:
return
self._writesqldirstate()
util.copyfile(self._opener.join(DBFILE),
self._opener.join(prefix + DBFILE + suffix))
def restorebackup(self, tr, suffix="", prefix=""):
if self.skipbackups:
return
self._opener.rename(prefix + DBFILE + suffix, DBFILE)
self.invalidate()
def clearbackup(self, tr, suffix="", prefix=""):
if self.skipbackups:
return
self._opener.unlink(prefix + DBFILE + suffix)
@propertycache
def _nonnormalset(self):
return self._map.nonnormalentries()
def invalidate(self):
# Transaction will be rolled back on the next open of the file.
# The close is faster in the case where we replace the whole
# db file as we don't need to rollback in such case.
self._sqlconn.close()
for a in ("_branch", "_pl", "_ignore", "_nonnormalset"):
if a in self.__dict__:
delattr(self, a)
self._lastnormaltime = 0
self._dirty = False
self._parentwriters = 0
self._sqlinit()
if util.safehasattr(self, '_fsmonitorstate'):
self._fsmonitorstate.invalidate()
def write(self, tr=False):
# if dirty dump to disk (db transaction commit)
if not self._dirty:
return
now = dirstate._getfsnow(self._opener)
self._map.resetnow(now, self._nonnormalset)
if tr:
tr.addfinalize("sqldirstate.write", self._backupandwrite)
return
self._writesqldirstate()
def _writedirstate(self, st):
self._writesqldirstate()
def _writesqldirstate(self):
# notify callbacks about parents change
if self._origpl is not None:
if self._origpl != self._pl:
for c, callback in sorted(self._plchangecallbacks.iteritems()):
callback(self, self._origpl, self._pl)
self._origpl = None
# if dirty dump to disk (db transaction commit)
now = dirstate._getfsnow(self._opener)
self._map.resetnow(now, self._nonnormalset)
self._sqlconn.commit()
self._lastnormaltime = 0
self._dirty = self._dirtypl = False
if '_nonnormalset' in self.__dict__:
delattr(self, '_nonnormalset')
if self._ui.configbool('sqldirstate', 'fakedirstate', True):
writefakedirstate(self)
def _backupandwrite(self, tr):
if not self.skipbackups:
backuppath = self._opener.join('%s.%s' % (tr.journal, DBFILE))
util.copyfile(self._sqlfilename, backuppath)
tr._addbackupentry(('plain', self._sqlfilename,
backuppath, False))
self._writesqldirstate()
def clear(self):
self._map.clear()
if '_nonnormalset' in self.__dict__:
delattr(self, '_nonnormalset')
self._dirs.clear()
self._copymap.clear()
self._filefoldmap.clear()
self._dirfoldmap.clear()
self._pl = [nullid, nullid]
self._lastnormaltime = 0
self._dirty = True
def setparents(self, p1, p2=nullid):
"""Set dirstate parents to p1 and p2.
When moving from two parents to one, 'm' merged entries a
adjusted to normal and previous copy records discarded and
returned by the call.
See localrepo.setparents()
"""
if self._parentwriters == 0:
raise ValueError("cannot set dirstate parent without "
"calling dirstate.beginparentchange")
self._dirty = self._dirtypl = True
oldp2 = self._pl[1]
if self._origpl is None:
self._origpl = self._pl
self._pl = p1, p2
copies = {}
if oldp2 != nullid and p2 == nullid:
# Discard 'm' markers when moving away from a merge state
for f, s in self._map.modifiedentries():
if f in self._copymap:
copies[f] = self._copymap[f]
self.normallookup(f)
# Also fix up otherparent markers
for f, s in self._map.otherparententries():
if f in self._copymap:
copies[f] = self._copymap[f]
self.add(f)
return copies
def walk(self, match, subrepos, unknown, ignored, full=True):
self._map.enablelookupcache()
self._copymap.enablelookupcache()
return super(sqldirstate, self).walk(
match, subrepos, unknown, ignored, full)
def _sqlmigration(self):
if self._sqlschemaversion == 1:
cur = self._sqlconn.cursor()
cur.execute('''SELECT filename, status, mode, size, mtime
FROM files WHERE status != 'n' or mtime = -1''')
rows = cur.fetchall()
nonnormalfiles = dict()
for r in rows:
nonnormalfiles[r[0]] = self._map._rowtovalue(r[1:])
self._map.update(nonnormalfiles)
setversion(self._sqlconn, 2)
self._sqlschemaversion = 2
return sqldirstate
def writefakedirstate(dirstate):
st = dirstate._opener(FAKEDIRSTATE, "w", atomictemp=True)
st.write("".join(dirstate._pl))
st.write("\nThis is fake dirstate put here by the sqldirsate.")
st.write("\nIt contains only working copy parents info.")
st.write("\nThe real dirstate is in dirstate.sqlite3 file.")
st.close()
def tosql(dirstate):
"""" converts flat dirstate to sqldirstate
note: the sole responsibility of this function is to write the new dirstate
it's not touching anything but the DBFILE
"""
sqlfilename = dirstate._opener.join(DBFILE)
try:
os.unlink(sqlfilename)
except OSError:
pass
sqlconn = sqlite3.connect(sqlfilename)
sqlconn.text_factory = str
# Those two pragmas are dangerous and may corrupt db if interrupted but we
# are populating db now so we are not afraid to lose it and we want it to
# be fast.
sqlconn.execute("PRAGMA synchronous = OFF")
sqlconn.execute("PRAGMA journal_mode = OFF")
sqlconn.execute("PRAGMA cache_size = %d" % SQLITE_CACHE_SIZE)
createotherschema(sqlconn)
getversion(sqlconn)
sqlmap = sqldirstatemap(sqlconn)
copymap = sqlcopymap(sqlconn)
filefoldmap = sqlfilefoldmap(sqlconn)
dirfoldmap = sqldirfoldmap(sqlconn)
dirsdict = sqldirsdict(sqlconn)
sqldirs(sqlconn, dirsdict=dirstate._map)
sqlmap.update(dirstate._map)
copymap.update(dirstate._copymap)
filefoldmap.update(dirstate._filefoldmap)
dirfoldmap.update(dirstate._dirfoldmap)
dirs = sqldirs(sqlconn, filemap=dirstate._map, dirsdict={})
dirsdict.update(dirs.dirsdict)
cur = sqlconn.cursor()
cur.executemany('''INSERT OR REPLACE INTO
other (key, value)
VALUES (?, ?)''',
[('p1', hex(dirstate.p1())), ('p2', hex(dirstate.p2()))]
)
cur.close()
sqlconn.commit()
writefakedirstate(dirstate)
sqlconn.execute("PRAGMA synchronous = ON")
sqlconn.execute("PRAGMA journal_mode = DELETE")
def toflat(sqldirstate):
"""" converts sqldirstate to flat dirstate
note: the sole responsibility of this function is to write the new dirstate
it's not touching anything but the dirstate file
"""
# converts a sqldirstate to a flat one
st = sqldirstate._opener("dirstate", "w", atomictemp=True)
newmap = {}
for k, v in sqldirstate._map.iteritems():
newmap[k] = v
newcopymap = {}
for k, v in sqldirstate._copymap.iteritems():
newcopymap[k] = v
st.write(parsers.pack_dirstate(newmap, newcopymap, sqldirstate._pl,
dirstate._getfsnow(sqldirstate._opener)))
st.close()