sapling/mercurial/phases.py

492 lines
17 KiB
Python
Raw Normal View History

""" Mercurial phases support code
---
Copyright 2011 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
Logilab SA <contact@logilab.fr>
Augie Fackler <durin42@gmail.com>
This software may be used and distributed according to the terms
of the GNU General Public License version 2 or any later version.
---
This module implements most phase logic in mercurial.
Basic Concept
=============
2012-05-13 14:06:12 +04:00
A 'changeset phase' is an indicator that tells us how a changeset is
manipulated and communicated. The details of each phase is described
below, here we describe the properties they have in common.
Like bookmarks, phases are not stored in history and thus are not
permanent and leave no audit trail.
First, no changeset can be in two phases at once. Phases are ordered,
so they can be considered from lowest to highest. The default, lowest
phase is 'public' - this is the normal phase of existing changesets. A
child changeset can not be in a lower phase than its parents.
These phases share a hierarchy of traits:
immutable shared
public: X X
draft: X
secret:
2012-05-13 14:06:12 +04:00
Local commits are draft by default.
2012-05-13 14:06:12 +04:00
Phase Movement and Exchange
===========================
Phase data is exchanged by pushkey on pull and push. Some servers have
a publish option set, we call such a server a "publishing server".
Pushing a draft changeset to a publishing server changes the phase to
public.
A small list of fact/rules define the exchange of phase:
* old client never changes server states
* pull never changes server states
2012-05-13 14:06:12 +04:00
* publish and old server changesets are seen as public by client
* any secret changeset seen in another repository is lowered to at
least draft
Here is the final table summing up the 49 possible use cases of phase
exchange:
server
old publish non-publish
N X N D P N D P
old client
pull
N - X/X - X/D X/P - X/D X/P
X - X/X - X/D X/P - X/D X/P
push
X X/X X/X X/P X/P X/P X/D X/D X/P
new client
pull
N - P/X - P/D P/P - D/D P/P
D - P/X - P/D P/P - D/D P/P
P - P/X - P/D P/P - P/D P/P
push
D P/X P/X P/P P/P P/P D/D D/D P/P
P P/X P/X P/P P/P P/P P/P P/P P/P
Legend:
A/B = final state on client / state on server
* N = new/not present,
* P = public,
* D = draft,
* X = not tracked (i.e., the old client or server has no internal
way of recording the phase.)
passive = only pushes
A cell here can be read like this:
"When a new client pushes a draft changeset (D) to a publishing
server where it's not present (N), it's marked public on both
sides (P/P)."
2012-05-13 14:06:12 +04:00
Note: old client behave as a publishing server with draft only content
- other people see it as public
- content is pushed as draft
"""
2011-11-04 02:49:14 +04:00
2015-08-09 05:46:33 +03:00
from __future__ import absolute_import
import errno
2015-08-09 05:46:33 +03:00
from .i18n import _
from .node import (
bin,
hex,
nullid,
nullrev,
short,
)
from . import (
error,
smartset,
txnutil,
2015-08-09 05:46:33 +03:00
)
allphases = public, draft, secret = range(3)
2011-11-04 02:49:14 +04:00
trackedphases = allphases[1:]
phasenames = ['public', 'draft', 'secret']
def _readroots(repo, phasedefaults=None):
"""Read phase roots from disk
phasedefaults is a list of fn(repo, roots) callable, which are
executed if the phase roots file does not exist. When phases are
being initialized on an existing repository, this could be used to
set selected changesets phase to something else than public.
Return (roots, dirty) where dirty is true if roots differ from
what is being stored.
"""
repo = repo.unfiltered()
dirty = False
roots = [set() for i in allphases]
try:
f, pending = txnutil.trypending(repo.root, repo.svfs, 'phaseroots')
try:
for line in f:
phase, nh = line.split()
roots[int(phase)].add(bin(nh))
finally:
f.close()
except IOError as inst:
if inst.errno != errno.ENOENT:
raise
if phasedefaults:
for f in phasedefaults:
roots = f(repo, roots)
dirty = True
return roots, dirty
class phasecache(object):
def __init__(self, repo, phasedefaults, _load=True):
if _load:
# Cheap trick to allow shallow-copy without copy module
self.phaseroots, self.dirty = _readroots(repo, phasedefaults)
self._phaserevs = None
self._phasesets = None
self.filterunknown(repo)
self.opener = repo.svfs
def getrevset(self, repo, phases):
"""return a smartset for the given phases"""
self.loadphaserevs(repo) # ensure phase's sets are loaded
if self._phasesets and all(self._phasesets[p] is not None
for p in phases):
# fast path - use _phasesets
revs = self._phasesets[phases[0]]
if len(phases) > 1:
revs = revs.copy() # only copy when needed
for p in phases[1:]:
revs.update(self._phasesets[p])
if repo.changelog.filteredrevs:
revs = revs - repo.changelog.filteredrevs
return smartset.baseset(revs)
else:
# slow path - enumerate all revisions
phase = self.phase
revs = (r for r in repo if phase(repo, r) in phases)
return smartset.generatorset(revs, iterasc=True)
def copy(self):
# Shallow copy meant to ensure isolation in
# advance/retractboundary(), nothing more.
ph = self.__class__(None, None, _load=False)
ph.phaseroots = self.phaseroots[:]
ph.dirty = self.dirty
ph.opener = self.opener
ph._phaserevs = self._phaserevs
ph._phasesets = self._phasesets
return ph
def replace(self, phcache):
"""replace all values in 'self' with content of phcache"""
for a in ('phaseroots', 'dirty', 'opener', '_phaserevs', '_phasesets'):
setattr(self, a, getattr(phcache, a))
def _getphaserevsnative(self, repo):
repo = repo.unfiltered()
nativeroots = []
for phase in trackedphases:
nativeroots.append(map(repo.changelog.rev, self.phaseroots[phase]))
phases: really fix native phase computation For some reason (probably rebase issue, leprechaun or badly resolved .rej) 4d2ffbaf1d5d contains only half of the emailed patches and do not fix the bug. This patch adds the other half and enable the sweet native computation for real. As expected this provide massive speedup along the board. revset #0: not public() plain first 0) 0.011960 0.010523 1) 0.000465 3% 0.000492 4% revset #1: (tip~1000::) - public() plain first 0) 0.025700 0.025169 1) 0.002864 11% 0.001899 7% revset #2: not public() and branch("default") plain first 0) 0.022842 0.020863 1) 0.011418 49% 0.010948 52% However, it has a less impact (even bad) on first result time in simple situation. This comes from the overhead of building the set and filtering it. This is especially true on my Mercurial repository (used here) where about 1/3 of the changesets are non public and hidden. This could be mitigated by a caching of the set and a better usage of smartset in '_notpublic'. (But this won't happen in this patch because the win is massive everywhere else). revset #0: not public() last 0) 0.000081 1) 0.000493 x6.1 <-- bad impact revset #1: (tip~1000::) - public() last 0) 0.013966 1) 0.002737 19% revset #2: not public() and branch("default") last 0) 0.011021 1) 0.011038 The effect mostly disappear when the number of non-public changesets is small and/or the repo get bigger. Result for Mozilla central: Mozilla revset #0: not public() plain first last 0) 0.092787 0.084094 0.000080 1) 0.000054 0% 0.000083 0% 0.000083 revset #1: (tip~1000::) - public() plain first last 0) 0.215607 0.183996 0.124962 1) 0.031620 14% 0.006616 3% 0.031168 24% revset #2: not public() and branch("default") plain first last 0) 0.092626 0.082687 0.000162 1) 0.000139 0% 0.000165 0% 0.000167
2015-06-11 05:26:16 +03:00
return repo.changelog.computephases(nativeroots)
def _computephaserevspure(self, repo):
repo = repo.unfiltered()
revs = [public] * len(repo.changelog)
self._phaserevs = revs
self._populatephaseroots(repo)
for phase in trackedphases:
roots = list(map(repo.changelog.rev, self.phaseroots[phase]))
if roots:
for rev in roots:
revs[rev] = phase
for rev in repo.changelog.descendants(roots):
revs[rev] = phase
def loadphaserevs(self, repo):
"""ensure phase information is loaded in the object"""
if self._phaserevs is None:
try:
res = self._getphaserevsnative(repo)
self._phaserevs, self._phasesets = res
except AttributeError:
self._computephaserevspure(repo)
def invalidate(self):
self._phaserevs = None
self._phasesets = None
def _populatephaseroots(self, repo):
"""Fills the _phaserevs cache with phases for the roots.
"""
cl = repo.changelog
phaserevs = self._phaserevs
for phase in trackedphases:
roots = map(cl.rev, self.phaseroots[phase])
for root in roots:
phaserevs[root] = phase
def phase(self, repo, rev):
2012-08-16 00:38:42 +04:00
# We need a repo argument here to be able to build _phaserevs
# if necessary. The repository instance is not stored in
# phasecache to avoid reference cycles. The changelog instance
# is not stored because it is a filecache() property and can
# be replaced without us being notified.
if rev == nullrev:
return public
if rev < nullrev:
raise ValueError(_('cannot lookup negative revision'))
if self._phaserevs is None or rev >= len(self._phaserevs):
self.invalidate()
self.loadphaserevs(repo)
return self._phaserevs[rev]
def write(self):
if not self.dirty:
return
f = self.opener('phaseroots', 'w', atomictemp=True, checkambig=True)
try:
self._write(f)
finally:
f.close()
def _write(self, fp):
for phase, roots in enumerate(self.phaseroots):
for h in roots:
fp.write('%i %s\n' % (phase, hex(h)))
self.dirty = False
def _updateroots(self, phase, newroots, tr):
self.phaseroots[phase] = newroots
self.invalidate()
self.dirty = True
tr.addfilegenerator('phase', ('phaseroots',), self._write)
tr.hookargs['phases_moved'] = '1'
def advanceboundary(self, repo, tr, targetphase, nodes):
# Be careful to preserve shallow-copied values: do not update
# phaseroots values, replace them.
repo = repo.unfiltered()
delroots = [] # set of root deleted by this path
for phase in xrange(targetphase + 1, len(allphases)):
# filter nodes that are not in a compatible phase already
nodes = [n for n in nodes
if self.phase(repo, repo[n].rev()) >= phase]
if not nodes:
break # no roots to move anymore
olds = self.phaseroots[phase]
roots = set(ctx.node() for ctx in repo.set(
'roots((%ln::) - (%ln::%ln))', olds, olds, nodes))
if olds != roots:
self._updateroots(phase, roots, tr)
# some roots may need to be declared for lower phases
delroots.extend(olds - roots)
# declare deleted root in the target phase
if targetphase != 0:
self.retractboundary(repo, tr, targetphase, delroots)
repo.invalidatevolatilesets()
def retractboundary(self, repo, tr, targetphase, nodes):
# Be careful to preserve shallow-copied values: do not update
# phaseroots values, replace them.
repo = repo.unfiltered()
currentroots = self.phaseroots[targetphase]
newroots = [n for n in nodes
if self.phase(repo, repo[n].rev()) < targetphase]
if newroots:
if nullid in newroots:
raise error.Abort(_('cannot change null revision phase'))
currentroots = currentroots.copy()
currentroots.update(newroots)
phase: improve retractboundary perf The existing retractboundary implementation computed the new boundary by walking all descendants of all existing roots and computing the new roots. This is O(commits since first root), which on long repos can be hundreds of thousands of commits. The new algorithm only updates roots that are greater than the new root locations. For common operations like commit on a repo with the earliest root several hundred thousand commits ago, this makes retractboundary go from 1 second to 0.008 seconds. I tested it by running the test suite with both implementations and checking that the root results were always the identical. There was some discussion on IRC about the safety of this (i.e. what if the new nodes are already part of the phase, etc). I've looked into it and believe this patch is safe: 1) The old existing code already filters the input nodes to only contain nodes that require retracting (i.e. we only make node X a new root if the old phase is less than the target phase), so there's no chance of us adding a unnecessary root to the phase (unless the input root is made unnecessary by another root in the same input, but see point #3). 2) Another way of thinking about this is: the only way the new algorithm would be different from the old algorithm is if it added a root that is a descendant of an old root (since the old algorithm would've caught this in the big "roots(%ln::)". At the beginning of the function, when we filter out roots that already meet the phase criteria, the *definition* of meeting the phase criteria is "not being a descendant of an existing root". Therefore, by definition none of the new roots we are processing are descendants of an existing root. 3) If two nodes are passed in as input, and one node is an ancestor of the other (and therefore the later node should not be a root), this is still caught by the 'roots(%ln::)' revset. So there's no chance of an extra root being introduced that way either.
2015-11-08 03:11:49 +03:00
# Only compute new roots for revs above the roots that are being
# retracted.
minnewroot = min(repo[n].rev() for n in newroots)
aboveroots = [n for n in currentroots
if repo[n].rev() >= minnewroot]
updatedroots = repo.set('roots(%ln::)', aboveroots)
finalroots = set(n for n in currentroots if repo[n].rev() <
minnewroot)
finalroots.update(ctx.node() for ctx in updatedroots)
self._updateroots(targetphase, finalroots, tr)
repo.invalidatevolatilesets()
def filterunknown(self, repo):
"""remove unknown nodes from the phase boundary
Nothing is lost as unknown nodes only hold data for their descendants.
"""
filtered = False
nodemap = repo.changelog.nodemap # to filter unknown nodes
for phase, nodes in enumerate(self.phaseroots):
missing = sorted(node for node in nodes if node not in nodemap)
if missing:
for mnode in missing:
repo.ui.debug(
'removing unknown node %s from %i-phase boundary\n'
% (short(mnode), phase))
nodes.symmetric_difference_update(missing)
filtered = True
if filtered:
self.dirty = True
# filterunknown is called by repo.destroyed, we may have no changes in
2013-10-23 21:49:56 +04:00
# root but phaserevs contents is certainly invalid (or at least we
# have not proper way to check that). related to issue 3858.
#
# The other caller is __init__ that have no _phaserevs initialized
# anyway. If this change we should consider adding a dedicated
2013-10-23 21:49:56 +04:00
# "destroyed" function to phasecache or a proper cache key mechanism
# (see branchmap one)
self.invalidate()
def advanceboundary(repo, tr, targetphase, nodes):
"""Add nodes to a phase changing other nodes phases if necessary.
This function move boundary *forward* this means that all nodes
are set in the target phase or kept in a *lower* phase.
Simplify boundary to contains phase roots only."""
phcache = repo._phasecache.copy()
phcache.advanceboundary(repo, tr, targetphase, nodes)
repo._phasecache.replace(phcache)
def retractboundary(repo, tr, targetphase, nodes):
"""Set nodes back to a phase changing other nodes phases if
necessary.
This function move boundary *backward* this means that all nodes
are set in the target phase or kept in a *higher* phase.
Simplify boundary to contains phase roots only."""
phcache = repo._phasecache.copy()
phcache.retractboundary(repo, tr, targetphase, nodes)
repo._phasecache.replace(phcache)
2011-12-15 14:24:26 +04:00
def listphases(repo):
2012-05-13 14:06:12 +04:00
"""List phases root for serialization over pushkey"""
2011-12-15 14:24:26 +04:00
keys = {}
value = '%i' % draft
for root in repo._phasecache.phaseroots[draft]:
keys[hex(root)] = value
2015-06-18 23:34:22 +03:00
if repo.publishing():
# Add an extra data to let remote know we are a publishing
# repo. Publishing repo can't just pretend they are old repo.
# When pushing to a publishing repo, the client still need to
# push phase boundary
2011-12-15 14:24:26 +04:00
#
# Push do not only push changeset. It also push phase data.
# New phase data may apply to common changeset which won't be
# push (as they are common). Here is a very simple example:
2011-12-15 14:24:26 +04:00
#
# 1) repo A push changeset X as draft to repo B
# 2) repo B make changeset X public
# 3) repo B push to repo A. X is not pushed but the data that
# X as now public should
2011-12-15 14:24:26 +04:00
#
# The server can't handle it on it's own as it has no idea of
# client phase data.
2011-12-15 14:24:26 +04:00
keys['publishing'] = 'True'
return keys
def pushphase(repo, nhex, oldphasestr, newphasestr):
2012-08-18 00:58:19 +04:00
"""List phases root for serialization over pushkey"""
repo = repo.unfiltered()
with repo.lock():
2011-12-15 14:24:26 +04:00
currentphase = repo[nhex].phase()
newphase = abs(int(newphasestr)) # let's avoid negative index surprise
oldphase = abs(int(oldphasestr)) # let's avoid negative index surprise
if currentphase == oldphase and newphase < oldphase:
with repo.transaction('pushkey-phase') as tr:
advanceboundary(repo, tr, newphase, [bin(nhex)])
2011-12-15 14:24:26 +04:00
return 1
elif currentphase == newphase:
# raced, but got correct result
return 1
2011-12-15 14:24:26 +04:00
else:
return 0
def analyzeremotephases(repo, subset, roots):
"""Compute phases heads and root in a subset of node from root dict
* subset is heads of the subset
* roots is {<nodeid> => phase} mapping. key and value are string.
Accept unknown element input
"""
repo = repo.unfiltered()
# build list from dictionary
draftroots = []
nodemap = repo.changelog.nodemap # to filter unknown nodes
for nhex, phase in roots.iteritems():
if nhex == 'publishing': # ignore data related to publish option
continue
node = bin(nhex)
phase = int(phase)
if phase == public:
if node != nullid:
repo.ui.warn(_('ignoring inconsistent public root'
' from remote: %s\n') % nhex)
elif phase == draft:
if node in nodemap:
draftroots.append(node)
else:
repo.ui.warn(_('ignoring unexpected root from remote: %i %s\n')
% (phase, nhex))
# compute heads
publicheads = newheads(repo, subset, draftroots)
return publicheads, draftroots
def newheads(repo, heads, roots):
"""compute new head of a subset minus another
* `heads`: define the first subset
* `roots`: define the second we subtract from the first"""
repo = repo.unfiltered()
revset = repo.set('heads((%ln + parents(%ln)) - (%ln::%ln))',
heads, roots, roots, heads)
return [c.node() for c in revset]
def newcommitphase(ui):
"""helper to get the target phase of new commit
Handle all possible values for the phases.new-commit options.
"""
v = ui.config('phases', 'new-commit', draft)
try:
return phasenames.index(v)
except ValueError:
try:
return int(v)
except ValueError:
msg = _("phases.new-commit: not a valid phase name ('%s')")
raise error.ConfigError(msg % v)
def hassecret(repo):
"""utility function that check if a repo have any secret changeset."""
return bool(repo._phasecache.phaseroots[2])