sapling/edenscm/hgext/infinitepush/sqlindexapi.py

407 lines
14 KiB
Python
Raw Normal View History

# Infinite push
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import logging
import os
import time
import warnings
import mysql.connector
from indexapi import indexapi, indexexception
from edenscm.mercurial import util
Preserve order of revisions after hg pullbackup Summary: This diff fixes hg pullbackup so it retrieves commits in the same order that there were pushed. This was caused by commits being stored in sets and dictionare which are by unorered types. These have been replaced by OrderedDict to mantain the order. **Description of the task:** Infinitepush is a mercurial extension that allows sharing commits easily. Among other things it also backs up all local commites that were made on devservers and laptops. That means that we always have up-to-date backup You can read more about it here - https://our.intern.facebook.com/intern/dex/mercurial-workflow/infinitepush/ . Backup is triggered whenever mercurial transaction is triggered i.e. during commit, rebase or any other operation that changes mercurial repo. `hg pullbackup` is the way to pull backed up commits. There is a problem. Let's say host A has this repo: ` o` ` |` ` o C1` ` | /` ` | C2` ` | /` ` o` So we have two draft commits: C1 and C2. C2 is probably an older commit. But if we restore it on another host we can get another output: ` o` ` |` ` o C2` ` | /` ` | C1` ` | /` ` o ` This happens because `hg sl` orders commits by it's revision number in revlog - https://www.mercurial-scm.org/wiki/Revlog . The point of the task is to make sure commits are ordered by their creation date. Link to comment: http://www.facebook.com/groups/scm/permalink/1504906422892306/?comment_id=1505017576214524&reply_comment_id=1506426179406997 Reviewed By: StanislavGlebik Differential Revision: D6884670 fbshipit-source-id: 3281dbc1e25e24662a4b6ba78b96b85d5bae78c9
2018-02-06 20:57:21 +03:00
def _convertbookmarkpattern(pattern):
# To search for \, specify it as \\
# To search for _, specify it as \_
# To search for %, specify it as \%
pattern = pattern.replace("\\", "\\\\")
pattern = pattern.replace("_", "\\_")
pattern = pattern.replace("%", "\\%")
if pattern.endswith("*"):
pattern = pattern[:-1] + "%"
return pattern
SEC_IN_DAY = 24 * 60 * 60
class sqlindexapi(indexapi):
"""
Sql backend for infinitepush index. See schema.sql
"""
def __init__(
self,
reponame,
host,
port,
database,
user,
password,
logfile,
loglevel,
shorthasholdrevthreshold,
waittimeout=600,
locktimeout=120,
):
super(sqlindexapi, self).__init__()
self.reponame = reponame
self.sqlargs = {
"host": host,
"port": port,
"database": database,
"user": user,
"password": password,
}
self.sqlconn = None
self.sqlcursor = None
if not logfile:
logfile = os.devnull
logging.basicConfig(filename=logfile)
self.log = logging.getLogger()
self.log.setLevel(loglevel)
self._connected = False
self._waittimeout = waittimeout
self._locktimeout = locktimeout
self.shorthasholdrevthreshold = shorthasholdrevthreshold
def sqlconnect(self):
if self.sqlconn:
raise indexexception("SQL connection already open")
if self.sqlcursor:
raise indexexception("SQL cursor already open without connection")
retry = 3
while True:
try:
self.sqlconn = mysql.connector.connect(force_ipv6=True, **self.sqlargs)
# Code is copy-pasted from hgsql. Bug fixes need to be
# back-ported!
# The default behavior is to return byte arrays, when we
# need strings. This custom convert returns strings.
self.sqlconn.set_converter_class(CustomConverter)
self.sqlconn.autocommit = False
break
except mysql.connector.errors.Error:
# mysql can be flakey occasionally, so do some minimal
# retrying.
retry -= 1
if retry == 0:
raise
time.sleep(0.2)
waittimeout = self.sqlconn.converter.escape("%s" % self._waittimeout)
self.sqlcursor = self.sqlconn.cursor()
self.sqlcursor.execute("SET wait_timeout=%s" % waittimeout)
self.sqlcursor.execute("SET innodb_lock_wait_timeout=%s" % self._locktimeout)
self._connected = True
def close(self):
"""Cleans up the metadata store connection."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
self.sqlcursor.close()
self.sqlconn.close()
self.sqlcursor = None
self.sqlconn = None
def __enter__(self):
if not self._connected:
self.sqlconnect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type is None:
self.sqlconn.commit()
else:
self.sqlconn.rollback()
def addbundle(self, bundleid, nodesctx):
"""Records bundles, mapping from node to bundle and metadata for nodes
"""
if not self._connected:
self.sqlconnect()
# insert bundle
self.log.info("ADD BUNDLE %r %r" % (self.reponame, bundleid))
self.sqlcursor.execute(
"INSERT INTO bundles(bundle, reponame) VALUES " "(%s, %s)",
params=(bundleid, self.reponame),
)
# insert nodes to bundle mapping
self.sqlcursor.executemany(
"INSERT INTO nodestobundle(node, bundle, reponame) "
"VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE "
"bundle=VALUES(bundle)",
[(ctx.hex(), bundleid, self.reponame) for ctx in nodesctx],
)
# insert metadata
data = [
(
ctx.hex(), # node
ctx.description(), # message
ctx.p1().hex(), # p1
ctx.p2().hex(), # p2
ctx.user(), # author
ctx.extra().get("committer", ctx.user()), # committer
int(ctx.date()[0]), # author_date
int(
ctx.extra().get("committer_date", int(ctx.date()[0]))
), # committer_date
self.reponame, # reponame
)
for ctx in nodesctx
]
self.sqlcursor.executemany(
"INSERT IGNORE INTO nodesmetadata(node, message, p1, p2, "
"author, committer, author_date, committer_date, "
"reponame) VALUES "
"(%s, %s, %s, %s, %s, %s, %s, %s, %s)",
data,
)
def addbookmark(self, bookmark, node):
"""Takes a bookmark name and hash, and records mapping in the metadata
store."""
if not self._connected:
self.sqlconnect()
self.log.info(
"ADD BOOKMARKS %r bookmark: %r node: %r" % (self.reponame, bookmark, node)
)
self.sqlcursor.execute(
"INSERT INTO bookmarkstonode(bookmark, node, reponame) "
"VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE node=VALUES(node)",
params=(bookmark, node, self.reponame),
)
def addmanybookmarks(self, bookmarks):
"""Records mapping of bookmarks and nodes"""
if not self._connected:
self.sqlconnect()
data = [
(bookmark, node, self.reponame) for bookmark, node in bookmarks.iteritems()
]
self.sqlcursor.executemany(
"INSERT INTO bookmarkstonode(bookmark, node, reponame) "
"VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE node=VALUES(node)",
data,
)
def deletebookmarks(self, patterns):
"""Accepts list of bookmark patterns and deletes them.
If `commit` is set then bookmark will actually be deleted. Otherwise
deletion will be delayed until the end of transaction.
"""
if not self._connected:
self.sqlconnect()
# build and execute detete query
self.log.info("DELETE BOOKMARKS: %s" % patterns)
patterns = [_convertbookmarkpattern(pattern) for pattern in patterns]
condition1 = "reponame = %s"
condition2 = " OR ".join(("bookmark LIKE (%s)",) * len(patterns))
query = "DELETE FROM bookmarkstonode WHERE (%s) AND (%s)" % (
condition1,
condition2,
)
self.sqlcursor.execute(query, params=[self.reponame] + patterns)
def getbundle(self, node):
"""Returns the bundleid for the bundle that contains the given node."""
if not self._connected:
self.sqlconnect()
self.log.info("GET BUNDLE %r %r" % (self.reponame, node))
self.sqlcursor.execute(
"SELECT bundle from nodestobundle " "WHERE node = %s AND reponame = %s",
params=(node, self.reponame),
)
result = self.sqlcursor.fetchall()
if len(result) != 1 or len(result[0]) != 1:
self.log.info("No matching node")
return None
bundle = result[0][0]
self.log.info("Found bundle %r" % bundle)
return bundle
def getnodebyprefix(self, prefix):
"""Returns the node with the given hash prefix.
None if it doesn't exist.
Raise error for ambiguous identifier"""
if not self._connected:
self.sqlconnect()
self.log.info("GET NODE BY PREFIX %r %r" % (self.reponame, prefix))
nodeprefixpattern = prefix + "%"
result = None
if len(prefix) >= 6 and len(prefix) < 20:
# With longer hashes we can make more complex QUERY
# in order to return some suggestions with the matched PREFIX
# so user can pick up the desired one easily
# there is no need to go this path for prefixes longer than 20
# because to find several commits is highly unlikely
# Order suggestions by date to show the recent ones first
cmd = (
"SELECT t1.node, t2.message, t2.author, t2.committer_date "
"FROM nodestobundle t1 JOIN nodesmetadata t2 "
"ON t1.node = t2.node AND t1.reponame = t2.reponame "
"WHERE t1.node LIKE %s AND t1.reponame = %s "
"ORDER BY t2.committer_date DESC LIMIT 5"
)
params = (nodeprefixpattern, self.reponame)
self.sqlcursor.execute(cmd, params)
result = self.sqlcursor.fetchall()
def gettitle(s):
return s.splitlines()[0]
# format time from timestamp
def formattime(s):
_timeformat = r"%d %b %Y %H:%M"
return time.strftime(_timeformat, time.localtime(int(s)))
# format metadata output from query rows
def formatdata(arr):
return "\n".join(
[
" changeset: {c}\n"
" author: {a}\n"
" date: {d}\n"
" summary: {m}\n".format(
c=c, m=gettitle(m), a=a, d=formattime(d)
)
for c, m, a, d in result
]
)
if len(result) > 1:
raise indexexception(
("ambiguous identifier '%s'\n" % prefix)
+ "#commitcloud suggestions are:\n"
+ formatdata(result)
)
if len(result) == 1:
revdate = result[0][3]
threshold = self.shorthasholdrevthreshold * SEC_IN_DAY
if time.time() - revdate > threshold:
raise indexexception(
"commit '%s' is more than %d days old\n"
"description:\n%s"
"#commitcloud hint: if you would like to fetch this "
"commit, please provide the full hash"
% (prefix, self.shorthasholdrevthreshold, formatdata(result))
)
else:
self.sqlcursor.execute(
"SELECT node from nodestobundle "
"WHERE node LIKE %s "
"AND reponame = %s "
"LIMIT 2",
params=(nodeprefixpattern, self.reponame),
)
result = self.sqlcursor.fetchall()
if len(result) > 1:
raise indexexception(
("ambiguous identifier '%s'\n" % prefix)
+ "suggestion: provide longer commithash prefix"
)
# result not found
if len(result) != 1 or len(result[0]) == 0:
self.log.info("No matching node")
return None
node = result[0][0]
# Log found result. It is unique.
self.log.info("Found node %r" % node)
return node
def getnode(self, bookmark):
"""Returns the node for the given bookmark. None if it doesn't exist."""
if not self._connected:
self.sqlconnect()
self.log.info("GET NODE reponame: %r bookmark: %r" % (self.reponame, bookmark))
self.sqlcursor.execute(
"SELECT node from bookmarkstonode WHERE " "bookmark = %s AND reponame = %s",
params=(bookmark, self.reponame),
)
result = self.sqlcursor.fetchall()
if len(result) != 1 or len(result[0]) != 1:
self.log.info("No matching bookmark")
return None
node = result[0][0]
self.log.info("Found node %r" % node)
return node
def getbookmarks(self, query):
if not self._connected:
self.sqlconnect()
self.log.info("QUERY BOOKMARKS reponame: %r query: %r" % (self.reponame, query))
query = _convertbookmarkpattern(query)
self.sqlcursor.execute(
"SELECT bookmark, node from bookmarkstonode WHERE "
Preserve order of revisions after hg pullbackup Summary: This diff fixes hg pullbackup so it retrieves commits in the same order that there were pushed. This was caused by commits being stored in sets and dictionare which are by unorered types. These have been replaced by OrderedDict to mantain the order. **Description of the task:** Infinitepush is a mercurial extension that allows sharing commits easily. Among other things it also backs up all local commites that were made on devservers and laptops. That means that we always have up-to-date backup You can read more about it here - https://our.intern.facebook.com/intern/dex/mercurial-workflow/infinitepush/ . Backup is triggered whenever mercurial transaction is triggered i.e. during commit, rebase or any other operation that changes mercurial repo. `hg pullbackup` is the way to pull backed up commits. There is a problem. Let's say host A has this repo: ` o` ` |` ` o C1` ` | /` ` | C2` ` | /` ` o` So we have two draft commits: C1 and C2. C2 is probably an older commit. But if we restore it on another host we can get another output: ` o` ` |` ` o C2` ` | /` ` | C1` ` | /` ` o ` This happens because `hg sl` orders commits by it's revision number in revlog - https://www.mercurial-scm.org/wiki/Revlog . The point of the task is to make sure commits are ordered by their creation date. Link to comment: http://www.facebook.com/groups/scm/permalink/1504906422892306/?comment_id=1505017576214524&reply_comment_id=1506426179406997 Reviewed By: StanislavGlebik Differential Revision: D6884670 fbshipit-source-id: 3281dbc1e25e24662a4b6ba78b96b85d5bae78c9
2018-02-06 20:57:21 +03:00
"reponame = %s AND bookmark LIKE %s "
# Bookmarks have to be restored in the same order of creation
# See T24417531
"ORDER BY time ASC",
params=(self.reponame, query),
)
result = self.sqlcursor.fetchall()
Preserve order of revisions after hg pullbackup Summary: This diff fixes hg pullbackup so it retrieves commits in the same order that there were pushed. This was caused by commits being stored in sets and dictionare which are by unorered types. These have been replaced by OrderedDict to mantain the order. **Description of the task:** Infinitepush is a mercurial extension that allows sharing commits easily. Among other things it also backs up all local commites that were made on devservers and laptops. That means that we always have up-to-date backup You can read more about it here - https://our.intern.facebook.com/intern/dex/mercurial-workflow/infinitepush/ . Backup is triggered whenever mercurial transaction is triggered i.e. during commit, rebase or any other operation that changes mercurial repo. `hg pullbackup` is the way to pull backed up commits. There is a problem. Let's say host A has this repo: ` o` ` |` ` o C1` ` | /` ` | C2` ` | /` ` o` So we have two draft commits: C1 and C2. C2 is probably an older commit. But if we restore it on another host we can get another output: ` o` ` |` ` o C2` ` | /` ` | C1` ` | /` ` o ` This happens because `hg sl` orders commits by it's revision number in revlog - https://www.mercurial-scm.org/wiki/Revlog . The point of the task is to make sure commits are ordered by their creation date. Link to comment: http://www.facebook.com/groups/scm/permalink/1504906422892306/?comment_id=1505017576214524&reply_comment_id=1506426179406997 Reviewed By: StanislavGlebik Differential Revision: D6884670 fbshipit-source-id: 3281dbc1e25e24662a4b6ba78b96b85d5bae78c9
2018-02-06 20:57:21 +03:00
bookmarks = util.sortdict()
for row in result:
if len(row) != 2:
self.log.info("Bad row returned: %s" % row)
continue
bookmarks[row[0]] = row[1]
return bookmarks
def saveoptionaljsonmetadata(self, node, jsonmetadata):
if not self._connected:
self.sqlconnect()
self.log.info(
(
"INSERT METADATA, QUERY BOOKMARKS reponame: %r "
+ "node: %r, jsonmetadata: %s"
)
% (self.reponame, node, jsonmetadata)
)
self.sqlcursor.execute(
"UPDATE nodesmetadata SET optional_json_metadata=%s WHERE "
"reponame=%s AND node=%s",
params=(jsonmetadata, self.reponame, node),
)
class CustomConverter(mysql.connector.conversion.MySQLConverter):
"""Ensure that all values being returned are returned as python string
(versus the default byte arrays)."""
def _STRING_to_python(self, value, dsc=None):
return str(value)
def _VAR_STRING_to_python(self, value, dsc=None):
return str(value)
def _BLOB_to_python(self, value, dsc=None):
return str(value)
commitcloud: attempt to fix no '_localstr_to_mysql' error Summary: There are certain commits that failed to backup with this error: remote: Traceback (most recent call last): remote: File "/usr/lib64/python2.7/site-packages/mercurial/dispatch.py", line 590, in _callcatch remote: return scmutil.callcatch(ui, func) remote: File "/usr/lib64/python2.7/site-packages/mercurial/scmutil.py", line 160, in callcatch remote: return func() remote: File "/usr/lib64/python2.7/site-packages/mercurial/dispatch.py", line 571, in _runcatchfunc remote: return _dispatch(req) remote: File "/usr/lib64/python2.7/site-packages/mercurial/dispatch.py", line 1357, in _dispatch remote: lui, repo, cmd, fullargs, ui, options, d, cmdpats, cmdoptions remote: File "/usr/lib64/python2.7/site-packages/hgext/clienttelemetry.py", line 91, in _runcommand remote: return orig(lui, repo, cmd, fullargs, ui, options, d, cmdpats, cmdoptions) remote: File "/usr/lib64/python2.7/site-packages/mercurial/dispatch.py", line 1061, in runcommand remote: ret = _runcommand(ui, options, cmd, d) remote: File "/usr/lib64/python2.7/site-packages/mercurial/dispatch.py", line 1369, in _runcommand remote: return cmdfunc() remote: File "/usr/lib64/python2.7/site-packages/mercurial/dispatch.py", line 1354, in <lambda> remote: d = lambda: util.checksignature(func)(ui, *args, **strcmdopt) remote: File "/usr/lib64/python2.7/site-packages/mercurial/util.py", line 1319, in check remote: return func(*args, **kwargs) remote: File "/usr/lib64/python2.7/site-packages/mercurial/util.py", line 1319, in check remote: return func(*args, **kwargs) remote: File "/usr/lib64/python2.7/site-packages/hgext/directaccess.py", line 118, in wrapwitherror remote: return orig(ui, repo, *args, **kwargs) remote: File "/usr/lib64/python2.7/site-packages/mercurial/util.py", line 1319, in check remote: return func(*args, **kwargs) remote: File "/usr/lib64/python2.7/site-packages/mercurial/commands/__init__.py", line 5911, in serve remote: s.serve_forever() remote: File "/usr/lib64/python2.7/site-packages/mercurial/sshserver.py", line 107, in serve_forever remote: while self.serve_one(): remote: File "/usr/lib64/python2.7/site-packages/mercurial/sshserver.py", line 138, in serve_one remote: rsp = wireproto.dispatch(self.repo, self, cmd) remote: File "/usr/lib64/python2.7/site-packages/hgext/pushrebase/__init__.py", line 266, in _wireprodispatch remote: return orig(repo, proto, command) remote: File "/usr/lib64/python2.7/site-packages/mercurial/wireproto.py", line 671, in dispatch remote: res = func(repo, proto, *args) remote: File "/usr/lib64/python2.7/site-packages/mercurial/wireproto.py", line 1166, in unbundle remote: r = exchange.unbundle(repo, gen, their_heads, "serve", proto._client()) remote: File "/usr/lib64/python2.7/site-packages/hgext/pushrebase/__init__.py", line 305, in unbundle remote: result = orig(repo, cg, heads, source, url) remote: File "/usr/lib64/python2.7/site-packages/hgext/hgsql.py", line 332, in unbundle remote: return orig(repo, cg, *args, **kwargs) remote: File "/usr/lib64/python2.7/site-packages/mercurial/exchange.py", line 2138, in unbundle remote: op = bundle2.processbundle(repo, cg, op=op) remote: File "/usr/lib64/python2.7/site-packages/mercurial/bundle2.py", line 469, in processbundle remote: processparts(repo, op, unbundler) remote: File "/usr/lib64/python2.7/site-packages/hgext/infinitepush/__init__.py", line 1394, in processparts remote: storebundle(op, cgparams, bundlefile) remote: File "/usr/lib64/python2.7/site-packages/hgext/infinitepush/__init__.py", line 1484, in storebundle remote: index.addbundle(key, nodesctx) remote: File "/usr/lib64/python2.7/site-packages/hgext/infinitepush/sqlindexapi.py", line 170, in addbundle remote: data, remote: File "/usr/lib/python2.7/site-packages/mysql/connector/cursor.py", line 567, in executemany remote: self.execute(operation, params) remote: File "/usr/lib/python2.7/site-packages/mysql/connector/cursor.py", line 477, in execute remote: stmt = operation % self._process_params(params) remote: File "/usr/lib/python2.7/site-packages/mysql/connector/cursor.py", line 355, in _process_params remote: "Failed processing format-parameters; %s" % err) remote: ProgrammingError: Failed processing format-parameters; 'CustomConverter' object has no attribute '_localstr_to_mysql' I'm not sure which code path creates localstr exactly. But it seems to be created for non-utf8 strings. Since Python 2's `str` is actually `bytes` that can store non-utf8 losslessly, let's just define the missing method that casts localstr to str (bytes). The change is not backported to hgsql intentionally since hgsql hasn't seen similar issues. Reviewed By: liubov-dmitrieva Differential Revision: D13069914 fbshipit-source-id: 8185dc457f8e6ac98a484f5bc6d6e7008ddcee02
2018-12-11 22:15:18 +03:00
# localstr is Mercurial-specific. See encoding.py
def _localstr_to_mysql(self, value):
return str(value)