sapling/mercurial/hbisect.py
Yann E. MORIN bd0d47a8d1 revset.bisect: add 'ignored' set to the bisect keyword
The 'ignored' changesets are outside the bisection range, but are
changesets that may have an impact on the outcome of the bisection.

For example, in case there's a merge between the good and bad csets,
but the branch-point is out of the bisection range, and the issue
originates from this branch, the branch will not be visited by bisect
and bisect will find that the culprit cset is the merge.

So, the 'ignored' set is equivalent to:
    (   ( ::bisect(bad) - ::bisect(good) )
      | ( ::bisect(good) - ::bisect(bad) ) )
    - bisect(range)

 - all ancestors of bad csets that are not ancestors of good csets, or
 - all ancestors of good csets that are not ancestors of bad csets
 - but that are not in the bisection range.

Signed-off-by: "Yann E. MORIN" <yann.morin.1998@anciens.enib.fr>
2011-09-20 20:21:04 +02:00

214 lines
7.6 KiB
Python

# changelog bisection for mercurial
#
# Copyright 2007 Matt Mackall
# Copyright 2005, 2006 Benoit Boissinot <benoit.boissinot@ens-lyon.org>
#
# Inspired by git bisect, extension skeleton taken from mq.py.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import os, error
from i18n import _
from node import short, hex
import util
def bisect(changelog, state):
"""find the next node (if any) for testing during a bisect search.
returns a (nodes, number, good) tuple.
'nodes' is the final result of the bisect if 'number' is 0.
Otherwise 'number' indicates the remaining possible candidates for
the search and 'nodes' contains the next bisect target.
'good' is True if bisect is searching for a first good changeset, False
if searching for a first bad one.
"""
clparents = changelog.parentrevs
skip = set([changelog.rev(n) for n in state['skip']])
def buildancestors(bad, good):
# only the earliest bad revision matters
badrev = min([changelog.rev(n) for n in bad])
goodrevs = [changelog.rev(n) for n in good]
goodrev = min(goodrevs)
# build visit array
ancestors = [None] * (len(changelog) + 1) # an extra for [-1]
# set nodes descended from goodrevs
for rev in goodrevs:
ancestors[rev] = []
for rev in xrange(goodrev + 1, len(changelog)):
for prev in clparents(rev):
if ancestors[prev] == []:
ancestors[rev] = []
# clear good revs from array
for rev in goodrevs:
ancestors[rev] = None
for rev in xrange(len(changelog), goodrev, -1):
if ancestors[rev] is None:
for prev in clparents(rev):
ancestors[prev] = None
if ancestors[badrev] is None:
return badrev, None
return badrev, ancestors
good = False
badrev, ancestors = buildancestors(state['bad'], state['good'])
if not ancestors: # looking for bad to good transition?
good = True
badrev, ancestors = buildancestors(state['good'], state['bad'])
bad = changelog.node(badrev)
if not ancestors: # now we're confused
if len(state['bad']) == 1 and len(state['good']) == 1:
raise util.Abort(_("starting revisions are not directly related"))
raise util.Abort(_("inconsistent state, %s:%s is good and bad")
% (badrev, short(bad)))
# build children dict
children = {}
visit = [badrev]
candidates = []
while visit:
rev = visit.pop(0)
if ancestors[rev] == []:
candidates.append(rev)
for prev in clparents(rev):
if prev != -1:
if prev in children:
children[prev].append(rev)
else:
children[prev] = [rev]
visit.append(prev)
candidates.sort()
# have we narrowed it down to one entry?
# or have all other possible candidates besides 'bad' have been skipped?
tot = len(candidates)
unskipped = [c for c in candidates if (c not in skip) and (c != badrev)]
if tot == 1 or not unskipped:
return ([changelog.node(rev) for rev in candidates], 0, good)
perfect = tot // 2
# find the best node to test
best_rev = None
best_len = -1
poison = set()
for rev in candidates:
if rev in poison:
# poison children
poison.update(children.get(rev, []))
continue
a = ancestors[rev] or [rev]
ancestors[rev] = None
x = len(a) # number of ancestors
y = tot - x # number of non-ancestors
value = min(x, y) # how good is this test?
if value > best_len and rev not in skip:
best_len = value
best_rev = rev
if value == perfect: # found a perfect candidate? quit early
break
if y < perfect and rev not in skip: # all downhill from here?
# poison children
poison.update(children.get(rev, []))
continue
for c in children.get(rev, []):
if ancestors[c]:
ancestors[c] = list(set(ancestors[c] + a))
else:
ancestors[c] = a + [c]
assert best_rev is not None
best_node = changelog.node(best_rev)
return ([best_node], tot, good)
def load_state(repo):
state = {'good': [], 'bad': [], 'skip': []}
if os.path.exists(repo.join("bisect.state")):
for l in repo.opener("bisect.state"):
kind, node = l[:-1].split()
node = repo.lookup(node)
if kind not in state:
raise util.Abort(_("unknown bisect kind %s") % kind)
state[kind].append(node)
return state
def save_state(repo, state):
f = repo.opener("bisect.state", "w", atomictemp=True)
wlock = repo.wlock()
try:
for kind in state:
for node in state[kind]:
f.write("%s %s\n" % (kind, hex(node)))
f.close()
finally:
wlock.release()
def get(repo, status):
"""
Return a list of revision(s) that match the given status:
- ``good``, ``bad``, ``skip``: as the names imply
- ``range`` : all csets taking part in the bisection
- ``pruned`` : csets that are good, bad or skipped
- ``untested`` : csets whose fate is yet unknown
- ``ignored`` : csets ignored due to DAG topology
"""
state = load_state(repo)
if status in ('good', 'bad', 'skip'):
return [repo.changelog.rev(n) for n in state[status]]
else:
# In the floowing sets, we do *not* call 'bisect()' with more
# than one level of recusrsion, because that can be very, very
# time consuming. Instead, we always develop the expression as
# much as possible.
# 'range' is all csets that make the bisection:
# - have a good ancestor and a bad descendant, or conversely
# that's because the bisection can go either way
range = '( bisect(bad)::bisect(good) | bisect(good)::bisect(bad) )'
# 'pruned' is all csets whose fate is already known:
# - a good ancestor and a good ascendant, or
# - a bad ancestor and a bad descendant, or
# - skipped
# But in case of irrelevant goods/bads, we also need to
# include them.
pg = 'bisect(good)::bisect(good)' # Pruned goods
pb = 'bisect(bad)::bisect(bad)' # Pruned bads
ps = 'bisect(skip)' # Pruned skipped
pruned = '( (%s) | (%s) | (%s) )' % (pg, pb, ps)
# 'untested' is all cset that are- in 'range', but not in 'pruned'
untested = '( (%s) - (%s) )' % (range, pruned)
# 'ignored' is all csets that were not used during the bisection
# due to DAG topology, but may however have had an impact.
# Eg., a branch merged between bads and goods, but whose branch-
# point is out-side of the range.
iba = '::bisect(bad) - ::bisect(good)' # Ignored bads' ancestors
iga = '::bisect(good) - ::bisect(bad)' # Ignored goods' ancestors
ignored = '( ( (%s) | (%s) ) - (%s) )' % (iba, iga, range)
if status == 'range':
return [c.rev() for c in repo.set(range)]
elif status == 'pruned':
return [c.rev() for c in repo.set(pruned)]
elif status == 'untested':
return [c.rev() for c in repo.set(untested)]
elif status == 'ignored':
return [c.rev() for c in repo.set(ignored)]
else:
raise error.ParseError(_('invalid bisect state'))