2017-05-31 20:46:16 +03:00
|
|
|
from __future__ import absolute_import
|
|
|
|
|
2016-05-03 22:32:35 +03:00
|
|
|
import os
|
|
|
|
from collections import defaultdict
|
2016-11-21 11:52:30 +03:00
|
|
|
from hgext3rd.extutil import runshellcommand
|
2017-05-31 20:46:16 +03:00
|
|
|
from mercurial import (
|
|
|
|
error,
|
|
|
|
extensions,
|
|
|
|
mdiff,
|
|
|
|
policy,
|
|
|
|
util,
|
|
|
|
)
|
2016-10-21 21:02:09 +03:00
|
|
|
from mercurial.node import nullid
|
2016-05-03 22:32:35 +03:00
|
|
|
from mercurial.i18n import _
|
2017-05-31 20:46:16 +03:00
|
|
|
from . import (
|
|
|
|
constants,
|
|
|
|
contentstore,
|
|
|
|
datapack,
|
|
|
|
historypack,
|
|
|
|
metadatastore,
|
|
|
|
shallowutil,
|
|
|
|
)
|
2017-08-07 21:30:53 +03:00
|
|
|
import time
|
2017-05-31 20:46:16 +03:00
|
|
|
|
|
|
|
osutil = policy.importmod(r'osutil')
|
2016-05-20 19:31:22 +03:00
|
|
|
|
2016-05-20 19:31:25 +03:00
|
|
|
def backgroundrepack(repo, incremental=True):
|
2016-05-20 19:31:22 +03:00
|
|
|
cmd = util.hgcmd() + ['-R', repo.origroot, 'repack']
|
2016-08-26 18:48:07 +03:00
|
|
|
msg = _("(running background repack)\n")
|
2016-05-20 19:31:25 +03:00
|
|
|
if incremental:
|
|
|
|
cmd.append('--incremental')
|
2016-08-26 18:48:07 +03:00
|
|
|
msg = _("(running background incremental repack)\n")
|
2016-05-20 19:31:22 +03:00
|
|
|
cmd = ' '.join(map(util.shellquote, cmd))
|
2016-05-20 19:31:28 +03:00
|
|
|
|
2016-08-26 18:48:07 +03:00
|
|
|
repo.ui.warn(msg)
|
2016-11-21 11:52:30 +03:00
|
|
|
runshellcommand(cmd, os.environ)
|
2016-05-20 19:31:22 +03:00
|
|
|
|
|
|
|
def fullrepack(repo):
|
2016-12-03 01:37:49 +03:00
|
|
|
if util.safehasattr(repo, 'shareddatastores'):
|
|
|
|
datasource = contentstore.unioncontentstore(*repo.shareddatastores)
|
|
|
|
historysource = metadatastore.unionmetadatastore(
|
|
|
|
*repo.sharedhistorystores,
|
|
|
|
allowincomplete=True)
|
2016-05-20 19:31:22 +03:00
|
|
|
|
2016-12-03 01:37:49 +03:00
|
|
|
packpath = shallowutil.getcachepackpath(
|
|
|
|
repo,
|
|
|
|
constants.FILEPACK_CATEGORY)
|
|
|
|
_runrepack(repo, datasource, historysource, packpath,
|
|
|
|
constants.FILEPACK_CATEGORY)
|
2016-11-30 03:00:39 +03:00
|
|
|
|
2017-04-27 20:44:34 +03:00
|
|
|
if repo.ui.configbool('treemanifest', 'server'):
|
|
|
|
treemfmod = extensions.find('treemanifest')
|
|
|
|
treemfmod.serverrepack(repo)
|
2017-08-08 05:27:17 +03:00
|
|
|
elif util.safehasattr(repo.manifestlog, 'datastore'):
|
2017-03-07 22:15:26 +03:00
|
|
|
localdata, shareddata = _getmanifeststores(repo)
|
|
|
|
lpackpath, ldstores, lhstores = localdata
|
|
|
|
spackpath, sdstores, shstores = shareddata
|
2017-03-02 03:55:19 +03:00
|
|
|
|
2016-12-03 01:37:45 +03:00
|
|
|
# Repack the shared manifest store
|
2017-03-07 22:15:26 +03:00
|
|
|
datasource = contentstore.unioncontentstore(*sdstores)
|
2017-04-27 20:44:34 +03:00
|
|
|
historysource = metadatastore.unionmetadatastore(
|
|
|
|
*shstores,
|
|
|
|
allowincomplete=True)
|
2017-03-07 22:15:26 +03:00
|
|
|
_runrepack(repo, datasource, historysource, spackpath,
|
2016-12-03 01:37:45 +03:00
|
|
|
constants.TREEPACK_CATEGORY)
|
|
|
|
|
|
|
|
# Repack the local manifest store
|
|
|
|
datasource = contentstore.unioncontentstore(
|
2017-03-07 22:15:26 +03:00
|
|
|
*ldstores,
|
2016-12-03 01:37:45 +03:00
|
|
|
allowincomplete=True)
|
2017-04-27 20:44:34 +03:00
|
|
|
historysource = metadatastore.unionmetadatastore(
|
|
|
|
*lhstores,
|
|
|
|
allowincomplete=True)
|
2017-03-07 22:15:26 +03:00
|
|
|
_runrepack(repo, datasource, historysource, lpackpath,
|
2016-11-30 03:00:39 +03:00
|
|
|
constants.TREEPACK_CATEGORY)
|
2016-05-20 19:31:22 +03:00
|
|
|
|
2017-04-20 07:14:04 +03:00
|
|
|
|
2016-05-20 19:31:25 +03:00
|
|
|
def incrementalrepack(repo):
|
|
|
|
"""This repacks the repo by looking at the distribution of pack files in the
|
|
|
|
repo and performing the most minimal repack to keep the repo in good shape.
|
|
|
|
"""
|
2016-12-03 01:37:49 +03:00
|
|
|
if util.safehasattr(repo, 'shareddatastores'):
|
|
|
|
packpath = shallowutil.getcachepackpath(
|
|
|
|
repo,
|
|
|
|
constants.FILEPACK_CATEGORY)
|
|
|
|
_incrementalrepack(repo,
|
|
|
|
repo.shareddatastores,
|
|
|
|
repo.sharedhistorystores,
|
|
|
|
packpath,
|
|
|
|
constants.FILEPACK_CATEGORY)
|
2016-12-03 01:37:42 +03:00
|
|
|
|
2017-04-27 20:44:34 +03:00
|
|
|
if repo.ui.configbool('treemanifest', 'server'):
|
|
|
|
treemfmod = extensions.find('treemanifest')
|
2017-05-17 01:28:13 +03:00
|
|
|
treemfmod.serverrepack(repo, incremental=True)
|
2017-08-08 05:27:17 +03:00
|
|
|
elif util.safehasattr(repo.manifestlog, 'datastore'):
|
2017-03-07 22:15:26 +03:00
|
|
|
localdata, shareddata = _getmanifeststores(repo)
|
|
|
|
lpackpath, ldstores, lhstores = localdata
|
|
|
|
spackpath, sdstores, shstores = shareddata
|
2017-03-02 03:55:19 +03:00
|
|
|
|
2016-12-03 01:37:47 +03:00
|
|
|
# Repack the shared manifest store
|
|
|
|
_incrementalrepack(repo,
|
2017-03-07 22:15:26 +03:00
|
|
|
sdstores,
|
|
|
|
shstores,
|
|
|
|
spackpath,
|
2016-12-03 01:37:47 +03:00
|
|
|
constants.TREEPACK_CATEGORY)
|
|
|
|
|
|
|
|
# Repack the local manifest store
|
|
|
|
_incrementalrepack(repo,
|
2017-03-07 22:15:26 +03:00
|
|
|
ldstores,
|
|
|
|
lhstores,
|
|
|
|
lpackpath,
|
2016-12-03 01:38:00 +03:00
|
|
|
constants.TREEPACK_CATEGORY,
|
|
|
|
allowincompletedata=True)
|
2016-12-03 01:37:47 +03:00
|
|
|
|
2017-03-02 03:55:19 +03:00
|
|
|
def _getmanifeststores(repo):
|
2017-08-08 05:27:17 +03:00
|
|
|
shareddatastores = repo.manifestlog.shareddatastores
|
|
|
|
localdatastores = repo.manifestlog.localdatastores
|
|
|
|
sharedhistorystores = repo.manifestlog.sharedhistorystores
|
|
|
|
localhistorystores = repo.manifestlog.localhistorystores
|
2017-03-02 03:55:19 +03:00
|
|
|
|
|
|
|
sharedpackpath = shallowutil.getcachepackpath(repo,
|
|
|
|
constants.TREEPACK_CATEGORY)
|
|
|
|
localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
|
|
|
|
constants.TREEPACK_CATEGORY)
|
|
|
|
|
|
|
|
# The native stores don't support repacking yet, so fall back to the
|
|
|
|
# python versions.
|
|
|
|
if repo.ui.configbool("treemanifest", "usecunionstore"):
|
|
|
|
usecdatapack = repo.ui.configbool("remotefilelog", "fastdatapack")
|
2017-03-07 22:15:26 +03:00
|
|
|
shareddatastores = [datapack.datapackstore(repo.ui, sharedpackpath,
|
2017-03-02 03:55:19 +03:00
|
|
|
usecdatapack=usecdatapack)]
|
2017-03-07 22:15:26 +03:00
|
|
|
localdatastores = [datapack.datapackstore(repo.ui, localpackpath,
|
2017-03-02 03:55:19 +03:00
|
|
|
usecdatapack=usecdatapack)]
|
|
|
|
|
2017-03-07 22:15:26 +03:00
|
|
|
return ((localpackpath, localdatastores, localhistorystores),
|
|
|
|
(sharedpackpath, shareddatastores, sharedhistorystores))
|
2017-03-02 03:55:19 +03:00
|
|
|
|
2016-12-03 01:38:00 +03:00
|
|
|
def _incrementalrepack(repo, datastore, historystore, packpath, category,
|
|
|
|
allowincompletedata=False):
|
2016-06-08 19:09:06 +03:00
|
|
|
shallowutil.mkstickygroupdir(repo.ui, packpath)
|
2016-06-01 20:06:35 +03:00
|
|
|
|
2016-05-20 19:31:25 +03:00
|
|
|
files = osutil.listdir(packpath, stat=True)
|
|
|
|
|
|
|
|
datapacks = _computeincrementaldatapack(repo.ui, files)
|
|
|
|
fullpaths = list(os.path.join(packpath, p) for p in datapacks)
|
|
|
|
datapacks = list(datapack.datapack(p) for p in fullpaths)
|
2016-12-03 01:37:42 +03:00
|
|
|
datapacks.extend(s for s in datastore
|
2016-06-01 20:06:35 +03:00
|
|
|
if not isinstance(s, datapack.datapackstore))
|
2016-05-20 19:31:31 +03:00
|
|
|
|
|
|
|
historypacks = _computeincrementalhistorypack(repo.ui, files)
|
|
|
|
fullpaths = list(os.path.join(packpath, p) for p in historypacks)
|
|
|
|
historypacks = list(historypack.historypack(p) for p in fullpaths)
|
2016-12-03 01:37:42 +03:00
|
|
|
historypacks.extend(s for s in historystore
|
2016-06-01 20:06:35 +03:00
|
|
|
if not isinstance(s, historypack.historypackstore))
|
2016-05-20 19:31:25 +03:00
|
|
|
|
2016-12-03 01:38:00 +03:00
|
|
|
datasource = contentstore.unioncontentstore(
|
|
|
|
*datapacks,
|
|
|
|
allowincomplete=allowincompletedata)
|
2016-05-26 12:13:53 +03:00
|
|
|
historysource = metadatastore.unionmetadatastore(*historypacks,
|
|
|
|
allowincomplete=True)
|
2016-05-20 19:31:25 +03:00
|
|
|
|
2016-12-03 01:37:42 +03:00
|
|
|
_runrepack(repo, datasource, historysource, packpath, category)
|
2016-05-20 19:31:25 +03:00
|
|
|
|
|
|
|
def _computeincrementaldatapack(ui, files):
|
|
|
|
"""Given a set of pack files and a set of generation size limits, this
|
|
|
|
function computes the list of files that should be packed as part of an
|
|
|
|
incremental repack.
|
|
|
|
|
|
|
|
It tries to strike a balance between keeping incremental repacks cheap (i.e.
|
|
|
|
packing small things when possible, and rolling the packs up to the big ones
|
|
|
|
over time).
|
|
|
|
"""
|
|
|
|
generations = ui.configlist("remotefilelog", "data.generations",
|
|
|
|
['1GB', '100MB', '1MB'])
|
|
|
|
generations = list(sorted((util.sizetoint(s) for s in generations),
|
|
|
|
reverse=True))
|
|
|
|
generations.append(0)
|
|
|
|
|
|
|
|
gencountlimit = ui.configint('remotefilelog', 'data.gencountlimit', 2)
|
|
|
|
repacksizelimit = ui.configbytes('remotefilelog', 'data.repacksizelimit',
|
|
|
|
'100MB')
|
|
|
|
|
|
|
|
return _computeincrementalpack(ui, files, generations, datapack.PACKSUFFIX,
|
|
|
|
datapack.INDEXSUFFIX, gencountlimit, repacksizelimit)
|
|
|
|
|
2016-05-20 19:31:31 +03:00
|
|
|
def _computeincrementalhistorypack(ui, files):
|
|
|
|
generations = ui.configlist("remotefilelog", "history.generations",
|
|
|
|
['100MB'])
|
|
|
|
generations = list(sorted((util.sizetoint(s) for s in generations),
|
|
|
|
reverse=True))
|
|
|
|
generations.append(0)
|
|
|
|
|
|
|
|
gencountlimit = ui.configint('remotefilelog', 'history.gencountlimit', 2)
|
|
|
|
repacksizelimit = ui.configbytes('remotefilelog', 'history.repacksizelimit',
|
|
|
|
'100MB')
|
|
|
|
|
|
|
|
return _computeincrementalpack(ui, files, generations,
|
|
|
|
historypack.PACKSUFFIX, historypack.INDEXSUFFIX, gencountlimit,
|
|
|
|
repacksizelimit)
|
|
|
|
|
2016-05-20 19:31:25 +03:00
|
|
|
def _computeincrementalpack(ui, files, limits, packsuffix, indexsuffix,
|
|
|
|
gencountlimit, repacksizelimit):
|
|
|
|
# Group the packs by generation (i.e. by size)
|
|
|
|
generations = []
|
|
|
|
for i in xrange(len(limits)):
|
|
|
|
generations.append([])
|
|
|
|
sizes = {}
|
|
|
|
fileset = set(fn for fn, mode, stat in files)
|
|
|
|
for filename, mode, stat in files:
|
|
|
|
if not filename.endswith(packsuffix):
|
|
|
|
continue
|
|
|
|
|
|
|
|
prefix = filename[:-len(packsuffix)]
|
|
|
|
|
|
|
|
# Don't process a pack if it doesn't have an index.
|
|
|
|
if (prefix + indexsuffix) not in fileset:
|
|
|
|
continue
|
|
|
|
|
|
|
|
size = stat.st_size
|
|
|
|
sizes[prefix] = size
|
|
|
|
for i, limit in enumerate(limits):
|
|
|
|
if size > limit:
|
|
|
|
generations[i].append(prefix)
|
|
|
|
break
|
|
|
|
|
|
|
|
# Find the largest generation with more than 2 packs and repack it.
|
|
|
|
for i, limit in enumerate(limits):
|
|
|
|
if len(generations[i]) > gencountlimit:
|
2017-06-22 02:59:44 +03:00
|
|
|
# Try to repack 3 things at once. This means if we run an
|
|
|
|
# incremental repack right after we add a new pack file, we'll still
|
|
|
|
# decrease the total number of pack files.
|
|
|
|
count = 3
|
2016-05-20 19:31:25 +03:00
|
|
|
if sum(sizes[n] for n in generations[i]) < repacksizelimit:
|
|
|
|
count = len(generations[i])
|
|
|
|
return sorted(generations[i], key=lambda x: sizes[x])[:count]
|
|
|
|
|
|
|
|
# If no generation has more than 2 packs, repack as many as fit into the
|
|
|
|
# limit
|
|
|
|
small = set().union(*generations[1:])
|
|
|
|
if len(small) > 1:
|
|
|
|
total = 0
|
|
|
|
packs = []
|
|
|
|
for pack in sorted(small, key=lambda x: sizes[x]):
|
|
|
|
size = sizes[pack]
|
|
|
|
if total + size < repacksizelimit:
|
|
|
|
packs.append(pack)
|
|
|
|
total += size
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
2016-05-20 19:31:31 +03:00
|
|
|
if len(packs) > 1:
|
2016-05-20 19:31:25 +03:00
|
|
|
return packs
|
|
|
|
|
|
|
|
# If there aren't small ones to repack, repack the two largest ones.
|
|
|
|
if len(generations[0]) > 1:
|
|
|
|
return generations[0]
|
|
|
|
|
|
|
|
return []
|
|
|
|
|
2016-12-03 01:37:42 +03:00
|
|
|
def _runrepack(repo, data, history, packpath, category):
|
2016-06-08 19:09:06 +03:00
|
|
|
shallowutil.mkstickygroupdir(repo.ui, packpath)
|
2016-05-20 19:31:22 +03:00
|
|
|
|
2017-08-07 21:30:53 +03:00
|
|
|
def isold(repo, filename, node):
|
|
|
|
"""Check if the file node is older than a limit.
|
|
|
|
Unless a limit is specified in the config the default limit is taken.
|
|
|
|
"""
|
|
|
|
filectx = repo.filectx(filename, fileid=node)
|
|
|
|
filetime = repo[filectx.linkrev()].date()
|
|
|
|
|
|
|
|
# Currently default TTL limit is 30 days
|
|
|
|
defaultlimit = 60 * 60 * 24 * 30
|
|
|
|
ttl = repo.ui.configint('remotefilelog', 'nodettl', defaultlimit)
|
|
|
|
|
|
|
|
limit = time.time() - ttl
|
|
|
|
return filetime[0] < limit
|
|
|
|
|
|
|
|
packer = repacker(repo, data, history, category, isold)
|
2016-05-20 19:31:22 +03:00
|
|
|
|
2017-04-27 05:50:36 +03:00
|
|
|
# internal config: remotefilelog.datapackversion
|
|
|
|
dv = repo.ui.configint('remotefilelog', 'datapackversion', 0)
|
|
|
|
|
|
|
|
with datapack.mutabledatapack(repo.ui, packpath, version=dv) as dpack:
|
2017-01-13 20:42:25 +03:00
|
|
|
with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
|
2016-05-20 19:31:22 +03:00
|
|
|
try:
|
|
|
|
packer.run(dpack, hpack)
|
|
|
|
except error.LockHeld:
|
|
|
|
raise error.Abort(_("skipping repack - another repack is "
|
|
|
|
"already running"))
|
2016-05-03 22:32:35 +03:00
|
|
|
|
|
|
|
class repacker(object):
|
|
|
|
"""Class for orchestrating the repack of data and history information into a
|
|
|
|
new format.
|
|
|
|
"""
|
2017-08-07 21:30:53 +03:00
|
|
|
def __init__(self, repo, data, history, category, isold=None):
|
2016-05-03 22:32:35 +03:00
|
|
|
self.repo = repo
|
|
|
|
self.data = data
|
|
|
|
self.history = history
|
2016-11-30 03:00:39 +03:00
|
|
|
self.unit = constants.getunits(category)
|
2017-08-07 21:30:53 +03:00
|
|
|
self.garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
|
|
|
|
if self.garbagecollect:
|
|
|
|
if not isold:
|
|
|
|
raise ValueError("Function 'isold' is not properly specified")
|
|
|
|
self.keepkeys = self._gckeepset()
|
|
|
|
self.isold = isold
|
|
|
|
|
|
|
|
def _gckeepset(self):
|
|
|
|
"""Computes a keepset which is not garbage collected.
|
|
|
|
"""
|
|
|
|
repo = self.repo
|
|
|
|
revs = ['.', 'draft()', 'parents(draft())', '(heads(all()) & date(-7))']
|
|
|
|
|
|
|
|
# If pullprefetch and bgprefetchrevs are specified include them as well
|
|
|
|
# since we don't want to prefetch and immediately garbage collect them
|
|
|
|
prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
|
|
|
|
if prefetchrevs:
|
|
|
|
revs.append('(%s)' % prefetchrevs)
|
|
|
|
prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
|
|
|
|
if prefetchrevs:
|
|
|
|
revs.append('(%s)' % prefetchrevs)
|
|
|
|
|
|
|
|
keep = repo.revs('+'.join(revs))
|
|
|
|
keepkeys = set()
|
|
|
|
for r in keep:
|
|
|
|
m = repo[r].manifest()
|
|
|
|
keepkeys.update(m.iteritems())
|
|
|
|
|
|
|
|
return keepkeys
|
2016-05-03 22:32:35 +03:00
|
|
|
|
|
|
|
def run(self, targetdata, targethistory):
|
|
|
|
ledger = repackledger()
|
|
|
|
|
2016-05-05 00:53:19 +03:00
|
|
|
with self.repo._lock(self.repo.svfs, "repacklock", False, None,
|
|
|
|
None, _('repacking %s') % self.repo.origroot):
|
|
|
|
self.repo.hook('prerepack')
|
2017-08-07 21:30:53 +03:00
|
|
|
|
2016-05-05 00:53:19 +03:00
|
|
|
# Populate ledger from source
|
|
|
|
self.data.markledger(ledger)
|
|
|
|
self.history.markledger(ledger)
|
|
|
|
|
|
|
|
# Run repack
|
|
|
|
self.repackdata(ledger, targetdata)
|
|
|
|
self.repackhistory(ledger, targethistory)
|
|
|
|
|
|
|
|
# Call cleanup on each source
|
|
|
|
for source in ledger.sources:
|
|
|
|
source.cleanup(ledger)
|
2016-05-03 22:32:35 +03:00
|
|
|
|
|
|
|
def repackdata(self, ledger, target):
|
2016-05-03 22:33:36 +03:00
|
|
|
ui = self.repo.ui
|
2017-05-22 03:09:08 +03:00
|
|
|
maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
|
2016-05-03 22:33:36 +03:00
|
|
|
|
|
|
|
byfile = {}
|
|
|
|
for entry in ledger.entries.itervalues():
|
|
|
|
if entry.datasource:
|
|
|
|
byfile.setdefault(entry.filename, {})[entry.node] = entry
|
|
|
|
|
|
|
|
count = 0
|
|
|
|
for filename, entries in sorted(byfile.iteritems()):
|
2017-04-20 07:14:04 +03:00
|
|
|
ui.progress(_("repacking data"), count, unit=self.unit,
|
|
|
|
total=len(byfile))
|
|
|
|
|
2016-05-03 22:33:36 +03:00
|
|
|
ancestors = {}
|
|
|
|
nodes = list(node for node in entries.iterkeys())
|
2016-05-16 20:59:09 +03:00
|
|
|
nohistory = []
|
2017-04-20 07:14:04 +03:00
|
|
|
for i, node in enumerate(nodes):
|
2016-05-05 00:53:13 +03:00
|
|
|
if node in ancestors:
|
|
|
|
continue
|
2017-04-20 07:14:04 +03:00
|
|
|
ui.progress(_("building history"), i, unit='nodes',
|
|
|
|
total=len(nodes))
|
2016-05-16 20:59:09 +03:00
|
|
|
try:
|
2017-04-20 07:14:04 +03:00
|
|
|
ancestors.update(self.history.getancestors(filename, node,
|
|
|
|
known=ancestors))
|
2016-05-16 20:59:09 +03:00
|
|
|
except KeyError:
|
|
|
|
# Since we're packing data entries, we may not have the
|
|
|
|
# corresponding history entries for them. It's not a big
|
|
|
|
# deal, but the entries won't be delta'd perfectly.
|
|
|
|
nohistory.append(node)
|
2017-04-20 07:14:04 +03:00
|
|
|
ui.progress(_("building history"), None)
|
2016-05-03 22:33:36 +03:00
|
|
|
|
|
|
|
# Order the nodes children first, so we can produce reverse deltas
|
2016-05-16 20:59:09 +03:00
|
|
|
orderednodes = list(reversed(self._toposort(ancestors)))
|
|
|
|
orderednodes.extend(sorted(nohistory))
|
2016-05-03 22:33:36 +03:00
|
|
|
|
|
|
|
# Compute deltas and write to the pack
|
2017-05-22 03:09:08 +03:00
|
|
|
deltabases = defaultdict(lambda: (nullid, 0))
|
2016-05-03 22:33:36 +03:00
|
|
|
nodes = set(nodes)
|
2017-04-20 07:14:04 +03:00
|
|
|
for i, node in enumerate(orderednodes):
|
2016-05-03 22:33:36 +03:00
|
|
|
# orderednodes is all ancestors, but we only want to serialize
|
|
|
|
# the files we have.
|
|
|
|
if node not in nodes:
|
|
|
|
continue
|
2016-05-16 20:59:09 +03:00
|
|
|
|
2017-08-07 21:30:53 +03:00
|
|
|
if self.garbagecollect:
|
|
|
|
# If the node is old and is not in the keepset
|
|
|
|
# we skip it and mark as garbage collected
|
|
|
|
if ((filename, node) not in self.keepkeys and
|
|
|
|
self.isold(self.repo, filename, node)):
|
|
|
|
entries[node].gced = True
|
|
|
|
continue
|
|
|
|
|
2017-04-20 07:14:04 +03:00
|
|
|
ui.progress(_("processing nodes"), i, unit='nodes',
|
|
|
|
total=len(orderednodes))
|
2016-05-03 22:33:36 +03:00
|
|
|
# Find delta base
|
|
|
|
# TODO: allow delta'ing against most recent descendant instead
|
|
|
|
# of immediate child
|
2017-05-22 03:09:08 +03:00
|
|
|
deltabase, chainlen = deltabases[node]
|
2016-05-03 22:33:36 +03:00
|
|
|
|
2016-05-16 20:59:09 +03:00
|
|
|
# Use available ancestor information to inform our delta choices
|
2016-05-16 20:59:09 +03:00
|
|
|
ancestorinfo = ancestors.get(node)
|
|
|
|
if ancestorinfo:
|
|
|
|
p1, p2, linknode, copyfrom = ancestorinfo
|
2016-05-03 22:33:36 +03:00
|
|
|
|
2016-05-16 20:59:09 +03:00
|
|
|
# The presence of copyfrom means we're at a point where the
|
|
|
|
# file was copied from elsewhere. So don't attempt to do any
|
|
|
|
# deltas with the other file.
|
|
|
|
if copyfrom:
|
|
|
|
p1 = nullid
|
2016-05-03 22:33:36 +03:00
|
|
|
|
2017-05-22 03:09:08 +03:00
|
|
|
if chainlen < maxchainlen:
|
|
|
|
# Record this child as the delta base for its parents.
|
|
|
|
# This may be non optimal, since the parents may have
|
|
|
|
# many children, and this will only choose the last one.
|
|
|
|
# TODO: record all children and try all deltas to find
|
|
|
|
# best
|
|
|
|
if p1 != nullid:
|
|
|
|
deltabases[p1] = (node, chainlen + 1)
|
|
|
|
if p2 != nullid:
|
|
|
|
deltabases[p2] = (node, chainlen + 1)
|
2016-05-03 22:33:36 +03:00
|
|
|
|
|
|
|
# Compute delta
|
2017-03-19 05:38:45 +03:00
|
|
|
# TODO: Optimize the deltachain fetching. Since we're
|
|
|
|
# iterating over the different version of the file, we may
|
|
|
|
# be fetching the same deltachain over and over again.
|
2016-05-03 22:33:36 +03:00
|
|
|
# TODO: reuse existing deltas if it matches our deltabase
|
|
|
|
if deltabase != nullid:
|
|
|
|
deltabasetext = self.data.get(filename, deltabase)
|
|
|
|
original = self.data.get(filename, node)
|
|
|
|
delta = mdiff.textdiff(deltabasetext, original)
|
|
|
|
else:
|
2017-03-19 05:38:45 +03:00
|
|
|
delta = self.data.get(filename, node)
|
2016-05-03 22:33:36 +03:00
|
|
|
|
|
|
|
# TODO: don't use the delta if it's larger than the fulltext
|
2016-11-30 02:37:58 +03:00
|
|
|
# TODO: don't use the delta if the chain is already long
|
2017-04-27 05:50:36 +03:00
|
|
|
meta = self.data.getmeta(filename, node)
|
|
|
|
target.add(filename, node, deltabase, delta, meta)
|
2016-05-03 22:33:36 +03:00
|
|
|
|
|
|
|
entries[node].datarepacked = True
|
|
|
|
|
2017-04-20 07:14:04 +03:00
|
|
|
ui.progress(_("processing nodes"), None)
|
2016-05-03 22:33:36 +03:00
|
|
|
count += 1
|
|
|
|
|
|
|
|
ui.progress(_("repacking data"), None)
|
2016-05-05 00:53:10 +03:00
|
|
|
target.close(ledger=ledger)
|
2016-05-03 22:32:35 +03:00
|
|
|
|
|
|
|
def repackhistory(self, ledger, target):
|
2016-05-03 22:33:54 +03:00
|
|
|
ui = self.repo.ui
|
|
|
|
|
|
|
|
byfile = {}
|
|
|
|
for entry in ledger.entries.itervalues():
|
|
|
|
if entry.historysource:
|
|
|
|
byfile.setdefault(entry.filename, {})[entry.node] = entry
|
|
|
|
|
|
|
|
count = 0
|
|
|
|
for filename, entries in sorted(byfile.iteritems()):
|
|
|
|
ancestors = {}
|
|
|
|
nodes = list(node for node in entries.iterkeys())
|
|
|
|
|
|
|
|
for node in nodes:
|
2016-05-05 00:53:13 +03:00
|
|
|
if node in ancestors:
|
|
|
|
continue
|
2017-04-20 07:14:04 +03:00
|
|
|
ancestors.update(self.history.getancestors(filename, node,
|
|
|
|
known=ancestors))
|
2016-05-03 22:33:54 +03:00
|
|
|
|
|
|
|
# Order the nodes children first
|
|
|
|
orderednodes = reversed(self._toposort(ancestors))
|
|
|
|
|
|
|
|
# Write to the pack
|
|
|
|
dontprocess = set()
|
|
|
|
for node in orderednodes:
|
|
|
|
p1, p2, linknode, copyfrom = ancestors[node]
|
|
|
|
|
2016-07-08 01:58:47 +03:00
|
|
|
# If the node is marked dontprocess, but it's also in the
|
|
|
|
# explicit entries set, that means the node exists both in this
|
|
|
|
# file and in another file that was copied to this file.
|
|
|
|
# Usually this happens if the file was copied to another file,
|
|
|
|
# then the copy was deleted, then reintroduced without copy
|
|
|
|
# metadata. The original add and the new add have the same hash
|
|
|
|
# since the content is identical and the parents are null.
|
|
|
|
if node in dontprocess and node not in entries:
|
|
|
|
# If copyfrom == filename, it means the copy history
|
|
|
|
# went to come other file, then came back to this one, so we
|
|
|
|
# should continue processing it.
|
|
|
|
if p1 != nullid and copyfrom != filename:
|
2016-05-03 22:33:54 +03:00
|
|
|
dontprocess.add(p1)
|
|
|
|
if p2 != nullid:
|
|
|
|
dontprocess.add(p2)
|
|
|
|
continue
|
|
|
|
|
|
|
|
if copyfrom:
|
|
|
|
dontprocess.add(p1)
|
|
|
|
|
2016-05-16 20:59:09 +03:00
|
|
|
target.add(filename, node, p1, p2, linknode, copyfrom)
|
2016-05-03 22:33:54 +03:00
|
|
|
|
|
|
|
if node in entries:
|
|
|
|
entries[node].historyrepacked = True
|
|
|
|
|
|
|
|
count += 1
|
2016-11-30 03:00:39 +03:00
|
|
|
ui.progress(_("repacking history"), count, unit=self.unit,
|
2016-05-03 22:33:54 +03:00
|
|
|
total=len(byfile))
|
|
|
|
|
|
|
|
ui.progress(_("repacking history"), None)
|
2016-05-05 00:53:10 +03:00
|
|
|
target.close(ledger=ledger)
|
2016-05-03 22:33:54 +03:00
|
|
|
|
|
|
|
def _toposort(self, ancestors):
|
|
|
|
def parentfunc(node):
|
|
|
|
p1, p2, linknode, copyfrom = ancestors[node]
|
|
|
|
parents = []
|
|
|
|
if p1 != nullid:
|
|
|
|
parents.append(p1)
|
|
|
|
if p2 != nullid:
|
|
|
|
parents.append(p2)
|
|
|
|
return parents
|
|
|
|
|
|
|
|
sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
|
|
|
|
return sortednodes
|
2016-05-03 22:32:35 +03:00
|
|
|
|
|
|
|
class repackledger(object):
|
|
|
|
"""Storage for all the bookkeeping that happens during a repack. It contains
|
|
|
|
the list of revisions being repacked, what happened to each revision, and
|
|
|
|
which source store contained which revision originally (for later cleanup).
|
|
|
|
"""
|
|
|
|
def __init__(self):
|
|
|
|
self.entries = {}
|
|
|
|
self.sources = {}
|
2016-05-05 00:53:10 +03:00
|
|
|
self.created = set()
|
2016-05-03 22:32:35 +03:00
|
|
|
|
|
|
|
def markdataentry(self, source, filename, node):
|
|
|
|
"""Mark the given filename+node revision as having a data rev in the
|
|
|
|
given source.
|
|
|
|
"""
|
|
|
|
entry = self._getorcreateentry(filename, node)
|
|
|
|
entry.datasource = True
|
2016-05-05 00:53:13 +03:00
|
|
|
entries = self.sources.get(source)
|
|
|
|
if not entries:
|
|
|
|
entries = set()
|
|
|
|
self.sources[source] = entries
|
|
|
|
entries.add(entry)
|
2016-05-03 22:32:35 +03:00
|
|
|
|
|
|
|
def markhistoryentry(self, source, filename, node):
|
|
|
|
"""Mark the given filename+node revision as having a history rev in the
|
|
|
|
given source.
|
|
|
|
"""
|
|
|
|
entry = self._getorcreateentry(filename, node)
|
|
|
|
entry.historysource = True
|
2016-05-05 00:53:13 +03:00
|
|
|
entries = self.sources.get(source)
|
|
|
|
if not entries:
|
|
|
|
entries = set()
|
|
|
|
self.sources[source] = entries
|
|
|
|
entries.add(entry)
|
2016-05-03 22:32:35 +03:00
|
|
|
|
|
|
|
def _getorcreateentry(self, filename, node):
|
2016-05-05 00:53:13 +03:00
|
|
|
key = (filename, node)
|
|
|
|
value = self.entries.get(key)
|
2016-05-03 22:32:35 +03:00
|
|
|
if not value:
|
|
|
|
value = repackentry(filename, node)
|
2016-05-05 00:53:13 +03:00
|
|
|
self.entries[key] = value
|
2016-05-03 22:32:35 +03:00
|
|
|
|
|
|
|
return value
|
|
|
|
|
2016-05-05 00:53:10 +03:00
|
|
|
def addcreated(self, value):
|
|
|
|
self.created.add(value)
|
|
|
|
|
2016-05-03 22:32:35 +03:00
|
|
|
class repackentry(object):
|
|
|
|
"""Simple class representing a single revision entry in the repackledger.
|
|
|
|
"""
|
2016-05-05 00:53:13 +03:00
|
|
|
__slots__ = ['filename', 'node', 'datasource', 'historysource',
|
2017-08-07 21:30:53 +03:00
|
|
|
'datarepacked', 'historyrepacked', 'gced']
|
2016-05-03 22:32:35 +03:00
|
|
|
def __init__(self, filename, node):
|
|
|
|
self.filename = filename
|
|
|
|
self.node = node
|
|
|
|
# If the revision has a data entry in the source
|
|
|
|
self.datasource = False
|
|
|
|
# If the revision has a history entry in the source
|
|
|
|
self.historysource = False
|
|
|
|
# If the revision's data entry was repacked into the repack target
|
|
|
|
self.datarepacked = False
|
|
|
|
# If the revision's history entry was repacked into the repack target
|
|
|
|
self.historyrepacked = False
|
2017-08-07 21:30:53 +03:00
|
|
|
# If garbage collected
|
|
|
|
self.gced = False
|