revsets: remove transitive edges in graph iterator by default

The git.git repo seems to have lots of merges from far back in the
history into newer history. That results in `jj log -r 'git_refs()'`
being completely useless because of the number of such edges. For
example, v2.31.0 has almost 600 edges going out of it and presumably
merging (forking) back into various different previous versions. Git,
unlike Mercurial, seems to remove an edge from the graph if the edge
can also be reached via a longer path. This commit makes it so we also
do that (i.e. the filtered graph is a transitive reduction of the
graph before filtering).

This slows down `jj log -r ,,v2.0.0 -T ""` by about 2%. That's still
small enough that it doesn't seem worth it to have a separate iterator
for contiguous ranges (which would be an option).
This commit is contained in:
Martin von Zweigbergk 2021-04-29 21:43:12 -07:00
parent 33da97f0bf
commit aef27d5701
2 changed files with 254 additions and 30 deletions

View File

@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::cmp::min;
use std::collections::{BTreeMap, HashSet};
use crate::index::{IndexEntry, IndexPosition};
@ -83,6 +84,40 @@ pub enum RevsetGraphEdgeType {
// from an external commit are missing, we consider the edge to that edge to
// also be missing. In the example above, that means that "B" will have a
// missing edge to "d" rather than to the root.
//
// The iterator can be configured to skip transitive edges that it would
// otherwise return. In this mode (which is the default), the edge from "A" to
// "E" in the example above would be excluded because there's also a transitive
// path from "A" to "E" via "B". The implementation of that mode
// adds a filtering step just before yielding the edges for a commit. The
// filtering works doing a DFS in the simplified graph. That may require even
// more look-ahead. Consider this example (uppercase characters are in the input
// set):
//
// J
// /|
// | i
// | |\
// | | H
// G | |
// | e f
// | \|\
// | D |
// \ / c
// b /
// |/
// A
// |
// root
//
// When walking from "J", we'll find indirect edges to "H", "G", and "D". This
// is our unfiltered set of edges, before removing transitive edges. In order to
// know that "D" is an ancestor of "H", we need to also walk from "H". We use
// the same search for finding edges from "H" as we used from "J". That results
// in looking ahead all the way to "A". We could reduce the amount of look-ahead
// by stopping at "c" since we're only interested in edges that could lead to
// "D", but that would require extra book-keeping to remember for later that the
// edges from "f" and "H" are only partially computed.
pub struct RevsetGraphIterator<'revset, 'repo> {
input_set_iter: RevsetIterator<'revset, 'repo>,
// Commits in the input set we had to take out of the iterator while walking external
@ -93,7 +128,8 @@ pub struct RevsetGraphIterator<'revset, 'repo> {
min_position: IndexPosition,
// Edges for commits not in the input set.
// TODO: Remove unneeded entries here as we go (that's why it's an ordered map)?
external_commits: BTreeMap<IndexPosition, HashSet<RevsetGraphEdge>>,
edges: BTreeMap<IndexPosition, HashSet<RevsetGraphEdge>>,
skip_transitive_edges: bool,
}
impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> {
@ -102,10 +138,16 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> {
input_set_iter: iter,
look_ahead: Default::default(),
min_position: IndexPosition::MAX,
external_commits: Default::default(),
edges: Default::default(),
skip_transitive_edges: true,
}
}
pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self {
self.skip_transitive_edges = skip_transitive_edges;
self
}
fn next_index_entry(&mut self) -> Option<IndexEntry<'repo>> {
if let Some((_, index_entry)) = self.look_ahead.pop_last() {
return Some(index_entry);
@ -117,6 +159,9 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> {
&mut self,
index_entry: &IndexEntry<'repo>,
) -> HashSet<RevsetGraphEdge> {
if let Some(edges) = self.edges.get(&index_entry.position()) {
return edges.clone();
}
let mut edges = HashSet::new();
for parent in index_entry.parents() {
let parent_position = parent.position();
@ -135,6 +180,7 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> {
}
}
}
self.edges.insert(index_entry.position(), edges.clone());
edges
}
@ -154,7 +200,7 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> {
if self.look_ahead.contains_key(&parent_position) {
// We have found a path back into the input set
edges.insert(RevsetGraphEdge::indirect(parent_position));
} else if let Some(parent_edges) = self.external_commits.get(&parent_position) {
} else if let Some(parent_edges) = self.edges.get(&parent_position) {
if parent_edges
.iter()
.all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing)
@ -175,10 +221,59 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> {
}
if parents_complete {
stack.pop().unwrap();
self.external_commits.insert(position, edges);
self.edges.insert(position, edges);
}
}
self.external_commits.get(&position).unwrap().clone()
self.edges.get(&position).unwrap().clone()
}
fn remove_transitive_edges(
&mut self,
edges: HashSet<RevsetGraphEdge>,
) -> HashSet<RevsetGraphEdge> {
if !edges
.iter()
.any(|edge| edge.edge_type == RevsetGraphEdgeType::Indirect)
{
return edges;
}
let mut min_generation = u32::MAX;
let mut initial_targets = HashSet::new();
let mut work = vec![];
// To start with, add the edges one step after the input edges.
for edge in &edges {
initial_targets.insert(edge.target);
if edge.edge_type != RevsetGraphEdgeType::Missing {
let entry = self.look_ahead.get(&edge.target).unwrap().clone();
min_generation = min(min_generation, entry.generation_number());
work.extend(self.edges_from_internal_commit(&entry));
}
}
// Find commits reachable transitively and add them to the `unwanted` set.
let mut unwanted = HashSet::new();
while let Some(edge) = work.pop() {
if edge.edge_type == RevsetGraphEdgeType::Missing || edge.target < self.min_position {
continue;
}
if !unwanted.insert(edge.target) {
// Already visited
continue;
}
if initial_targets.contains(&edge.target) {
// Already visited
continue;
}
let entry = self.look_ahead.get(&edge.target).unwrap().clone();
if entry.generation_number() < min_generation {
continue;
}
work.extend(self.edges_from_internal_commit(&entry));
}
edges
.into_iter()
.filter(|edge| !unwanted.contains(&edge.target))
.collect()
}
fn consume_to(&mut self, pos: IndexPosition) {
@ -199,7 +294,10 @@ impl<'revset, 'repo> Iterator for RevsetGraphIterator<'revset, 'repo> {
fn next(&mut self) -> Option<Self::Item> {
if let Some(index_entry) = self.next_index_entry() {
let edges = self.edges_from_internal_commit(&index_entry);
let mut edges = self.edges_from_internal_commit(&index_entry);
if self.skip_transitive_edges {
edges = self.remove_transitive_edges(edges);
}
Some((index_entry, edges))
} else {
None

View File

@ -17,9 +17,11 @@ use jujube_lib::revset_graph_iterator::RevsetGraphEdge;
use jujube_lib::testutils;
use jujube_lib::testutils::CommitGraphBuilder;
use maplit::hashset;
use test_case::test_case;
#[test]
fn test_graph_iterator_linearized() {
#[test_case(false ; "keep transitive edges")]
#[test_case(true ; "skip transitive edges")]
fn test_graph_iterator_linearized(skip_transitive_edges: bool) {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
@ -45,7 +47,11 @@ fn test_graph_iterator_linearized() {
let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap();
let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_d]);
let commits: Vec<_> = revset.iter().graph().collect();
let commits: Vec<_> = revset
.iter()
.graph()
.set_skip_transitive_edges(skip_transitive_edges)
.collect();
drop(revset);
assert_eq!(commits.len(), 2);
assert_eq!(commits[0].0.commit_id(), *commit_d.id());
@ -56,8 +62,9 @@ fn test_graph_iterator_linearized() {
tx.discard();
}
#[test]
fn test_graph_iterator_virtual_octopus() {
#[test_case(false ; "keep transitive edges")]
#[test_case(true ; "skip transitive edges")]
fn test_graph_iterator_virtual_octopus(skip_transitive_edges: bool) {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
@ -90,7 +97,11 @@ fn test_graph_iterator_virtual_octopus() {
mut_repo.as_repo_ref(),
&[&commit_a, &commit_b, &commit_c, &commit_f],
);
let commits: Vec<_> = revset.iter().graph().collect();
let commits: Vec<_> = revset
.iter()
.graph()
.set_skip_transitive_edges(skip_transitive_edges)
.collect();
drop(revset);
assert_eq!(commits.len(), 4);
assert_eq!(commits[0].0.commit_id(), *commit_f.id());
@ -112,8 +123,9 @@ fn test_graph_iterator_virtual_octopus() {
tx.discard();
}
#[test]
fn test_graph_iterator_simple_fork() {
#[test_case(false ; "keep transitive edges")]
#[test_case(true ; "skip transitive edges")]
fn test_graph_iterator_simple_fork(skip_transitive_edges: bool) {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
@ -143,7 +155,11 @@ fn test_graph_iterator_simple_fork() {
let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap();
let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_c, &commit_e]);
let commits: Vec<_> = revset.iter().graph().collect();
let commits: Vec<_> = revset
.iter()
.graph()
.set_skip_transitive_edges(skip_transitive_edges)
.collect();
drop(revset);
assert_eq!(commits.len(), 3);
assert_eq!(commits[0].0.commit_id(), *commit_e.id());
@ -156,15 +172,17 @@ fn test_graph_iterator_simple_fork() {
tx.discard();
}
#[test]
fn test_graph_iterator_multiple_missing() {
#[test_case(false ; "keep transitive edges")]
#[test_case(true ; "skip transitive edges")]
fn test_graph_iterator_multiple_missing(skip_transitive_edges: bool) {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
let mut_repo = tx.mut_repo();
// Tests that we get missing edges to "a" and "c" and not just one missing edge
// to the root. F
// to the root.
// F
// / \ F
// d e => /|\
// |\ /| ~ B ~
@ -187,7 +205,11 @@ fn test_graph_iterator_multiple_missing() {
let pos_c = mut_repo.index().commit_id_to_pos(commit_c.id()).unwrap();
let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_b, &commit_f]);
let commits: Vec<_> = revset.iter().graph().collect();
let commits: Vec<_> = revset
.iter()
.graph()
.set_skip_transitive_edges(skip_transitive_edges)
.collect();
drop(revset);
assert_eq!(commits.len(), 2);
assert_eq!(commits[0].0.commit_id(), *commit_f.id());
@ -205,14 +227,17 @@ fn test_graph_iterator_multiple_missing() {
tx.discard();
}
#[test]
fn test_graph_iterator_edge_to_ancestor() {
#[test_case(false ; "keep transitive edges")]
#[test_case(true ; "skip transitive edges")]
fn test_graph_iterator_edge_to_ancestor(skip_transitive_edges: bool) {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
let mut_repo = tx.mut_repo();
// Tests that we get both an edge from F to D and to D's ancestor C:
// Tests that we get both an edge from F to D and to D's ancestor C if we keep
// transitive edges and only the edge from F to D if we skip transitive
// edges:
// F F
// |\ |\
// D e D :
@ -235,19 +260,27 @@ fn test_graph_iterator_edge_to_ancestor() {
let pos_d = mut_repo.index().commit_id_to_pos(commit_d.id()).unwrap();
let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_c, &commit_d, &commit_f]);
let commits: Vec<_> = revset.iter().graph().collect();
let commits: Vec<_> = revset
.iter()
.graph()
.set_skip_transitive_edges(skip_transitive_edges)
.collect();
drop(revset);
assert_eq!(commits.len(), 3);
assert_eq!(commits[0].0.commit_id(), *commit_f.id());
assert_eq!(commits[1].0.commit_id(), *commit_d.id());
assert_eq!(commits[2].0.commit_id(), *commit_c.id());
assert_eq!(
commits[0].1,
hashset![
RevsetGraphEdge::indirect(pos_c),
RevsetGraphEdge::direct(pos_d)
]
);
if skip_transitive_edges {
assert_eq!(commits[0].1, hashset![RevsetGraphEdge::direct(pos_d)]);
} else {
assert_eq!(
commits[0].1,
hashset![
RevsetGraphEdge::indirect(pos_c),
RevsetGraphEdge::direct(pos_d)
]
);
}
assert_eq!(
commits[1].1,
hashset![
@ -259,3 +292,96 @@ fn test_graph_iterator_edge_to_ancestor() {
tx.discard();
}
#[test_case(false ; "keep transitive edges")]
#[test_case(true ; "skip transitive edges")]
fn test_graph_iterator_edge_escapes_from_(skip_transitive_edges: bool) {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
let mut_repo = tx.mut_repo();
// Tests a more complex case for skipping transitive edges.
// J
// /|
// | i J
// | |\ /:
// | | H | H
// G | | G :
// | e f => : D
// | \|\ :/
// | D | A
// \ / c |
// b / root
// |/
// A
// |
// root
let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo);
let commit_a = graph_builder.initial_commit();
let commit_b = graph_builder.commit_with_parents(&[&commit_a]);
let commit_c = graph_builder.commit_with_parents(&[&commit_a]);
let commit_d = graph_builder.commit_with_parents(&[&commit_b]);
let commit_e = graph_builder.commit_with_parents(&[&commit_d]);
let commit_f = graph_builder.commit_with_parents(&[&commit_d, &commit_c]);
let commit_g = graph_builder.commit_with_parents(&[&commit_b]);
let commit_h = graph_builder.commit_with_parents(&[&commit_f]);
let commit_i = graph_builder.commit_with_parents(&[&commit_e, &commit_h]);
let commit_j = graph_builder.commit_with_parents(&[&commit_g, &commit_i]);
let pos_root = mut_repo
.index()
.commit_id_to_pos(repo.store().root_commit_id())
.unwrap();
let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap();
let pos_d = mut_repo.index().commit_id_to_pos(commit_d.id()).unwrap();
let pos_g = mut_repo.index().commit_id_to_pos(commit_g.id()).unwrap();
let pos_h = mut_repo.index().commit_id_to_pos(commit_h.id()).unwrap();
let revset = revset_for_commits(
mut_repo.as_repo_ref(),
&[&commit_a, &commit_d, &commit_g, &commit_h, &commit_j],
);
let commits: Vec<_> = revset
.iter()
.graph()
.set_skip_transitive_edges(skip_transitive_edges)
.collect();
drop(revset);
assert_eq!(commits.len(), 5);
assert_eq!(commits[0].0.commit_id(), *commit_j.id());
assert_eq!(commits[1].0.commit_id(), *commit_h.id());
assert_eq!(commits[2].0.commit_id(), *commit_g.id());
assert_eq!(commits[3].0.commit_id(), *commit_d.id());
assert_eq!(commits[4].0.commit_id(), *commit_a.id());
if skip_transitive_edges {
assert_eq!(
commits[0].1,
hashset![
RevsetGraphEdge::indirect(pos_h),
RevsetGraphEdge::direct(pos_g)
]
);
assert_eq!(commits[1].1, hashset![RevsetGraphEdge::indirect(pos_d)]);
} else {
assert_eq!(
commits[0].1,
hashset![
RevsetGraphEdge::indirect(pos_h),
RevsetGraphEdge::direct(pos_g),
RevsetGraphEdge::indirect(pos_d),
]
);
assert_eq!(
commits[1].1,
hashset![
RevsetGraphEdge::indirect(pos_d),
RevsetGraphEdge::indirect(pos_a)
]
);
}
assert_eq!(commits[2].1, hashset![RevsetGraphEdge::indirect(pos_a)]);
assert_eq!(commits[3].1, hashset![RevsetGraphEdge::indirect(pos_a)]);
assert_eq!(commits[4].1, hashset![RevsetGraphEdge::missing(pos_root)]);
tx.discard();
}