From 27a7fccefa37f376906877de3b692e2020b562f4 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Mon, 20 Mar 2023 16:19:19 -0700 Subject: [PATCH] revset: add a method returning a change id index One of the remaining places we depend on index positions is when creating a `ChangeIdIndex`. This moves that into the revset engine (which is coupled to the commit index implementation) by adding a `Revset::change_id_index()` method. We will also use this function later when add support for resolving change id prefixes within a small revset. The current implementation simply creates an in-memory index using the existing `IdIndex` we have in `repo.rs`. The custom implementation at Google might do the same for small revsets that are available on the client, but for revsets involving many commits on the server, it might use a suboptimmal implementation that uses longer-than-necessary prefixes for performance reasons. That can be done by querying a server-side index including changes not in the revset, and then verifying that the resulting commits are actually in the revset. --- lib/src/default_index_store.rs | 10 +-- lib/src/default_revset_engine.rs | 52 +++++++++++++-- lib/src/revset.rs | 25 ++++++- lib/tests/test_revset.rs | 109 ++++++++++++++++++++++++++++++- 4 files changed, 182 insertions(+), 14 deletions(-) diff --git a/lib/src/default_index_store.rs b/lib/src/default_index_store.rs index b40056610..d4429267f 100644 --- a/lib/src/default_index_store.rs +++ b/lib/src/default_index_store.rs @@ -741,7 +741,7 @@ impl Index for MutableIndexImpl { repo: &'index dyn Repo, expression: &RevsetExpression, ) -> Result + 'index>, RevsetError> { - default_revset_engine::evaluate(repo, expression) + default_revset_engine::evaluate(repo, CompositeIndex(self), expression) } } @@ -833,7 +833,7 @@ trait IndexSegment { } #[derive(Clone)] -struct CompositeIndex<'a>(&'a dyn IndexSegment); +pub struct CompositeIndex<'a>(&'a dyn IndexSegment); impl<'a> CompositeIndex<'a> { fn ancestor_files_without_local(&self) -> impl Iterator> { @@ -890,7 +890,7 @@ impl<'a> CompositeIndex<'a> { } } - fn entry_by_pos(&self, pos: IndexPosition) -> IndexEntry<'a> { + pub fn entry_by_pos(&self, pos: IndexPosition) -> IndexEntry<'a> { let num_parent_commits = self.0.segment_num_parent_commits(); if pos.0 >= num_parent_commits { self.0.segment_entry_by_pos(pos, pos.0 - num_parent_commits) @@ -960,7 +960,7 @@ impl<'a> CompositeIndex<'a> { }) } - fn entry_by_id(&self, commit_id: &CommitId) -> Option> { + pub(crate) fn entry_by_id(&self, commit_id: &CommitId) -> Option> { self.commit_id_to_pos(commit_id) .map(|pos| self.entry_by_pos(pos)) } @@ -1822,7 +1822,7 @@ impl Index for ReadonlyIndexImpl { repo: &'index dyn Repo, expression: &RevsetExpression, ) -> Result + 'index>, RevsetError> { - default_revset_engine::evaluate(repo, expression) + default_revset_engine::evaluate(repo, CompositeIndex(self), expression) } } diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index 49fd0896c..ccf592b3c 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -18,13 +18,14 @@ use std::iter::Peekable; use itertools::Itertools; -use crate::backend::CommitId; -use crate::default_index_store::IndexEntry; +use crate::backend::{ChangeId, CommitId}; +use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition}; use crate::default_revset_graph_iterator::RevsetGraphIterator; +use crate::index::{HexPrefix, PrefixResolution}; use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher}; -use crate::repo::Repo; +use crate::repo::{IdIndex, Repo}; use crate::revset::{ - Revset, RevsetError, RevsetExpression, RevsetFilterPredicate, RevsetGraphEdge, + ChangeIdIndex, Revset, RevsetError, RevsetExpression, RevsetFilterPredicate, RevsetGraphEdge, RevsetIteratorExt, GENERATION_RANGE_FULL, }; use crate::rewrite; @@ -52,11 +53,18 @@ trait InternalRevset<'index>: ToPredicateFn<'index> { struct RevsetImpl<'index> { inner: Box + 'index>, + index: CompositeIndex<'index>, } impl<'index> RevsetImpl<'index> { - fn new(revset: Box + 'index>) -> Self { - Self { inner: revset } + fn new( + revset: Box + 'index>, + index: CompositeIndex<'index>, + ) -> Self { + Self { + inner: revset, + index, + } } } @@ -69,11 +77,40 @@ impl<'index> Revset<'index> for RevsetImpl<'index> { Box::new(RevsetGraphIterator::new(self)) } + fn change_id_index(&self) -> Box { + // TODO: Create a persistent lookup from change id to commit ids. + let mut pos_by_change = vec![]; + for entry in self.inner.iter() { + pos_by_change.push((entry.change_id(), entry.position())); + } + let pos_by_change = IdIndex::from_vec(pos_by_change); + Box::new(ChangeIdIndexImpl { + index: self.index.clone(), + pos_by_change, + }) + } + fn is_empty(&self) -> bool { self.iter().next().is_none() } } +struct ChangeIdIndexImpl<'index> { + index: CompositeIndex<'index>, + pos_by_change: IdIndex, +} + +impl ChangeIdIndex for ChangeIdIndexImpl<'_> { + fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution> { + self.pos_by_change + .resolve_prefix_with(prefix, |pos| self.index.entry_by_pos(*pos).commit_id()) + } + + fn shortest_unique_prefix_len(&self, change_id: &ChangeId) -> usize { + self.pos_by_change.shortest_unique_prefix_len(change_id) + } +} + struct EagerRevset<'index> { index_entries: Vec>, } @@ -376,10 +413,11 @@ impl<'index, I1: Iterator>, I2: Iterator( repo: &'index dyn Repo, + index: CompositeIndex<'index>, expression: &RevsetExpression, ) -> Result + 'index>, RevsetError> { let internal_revset = internal_evaluate(repo, expression)?; - Ok(Box::new(RevsetImpl::new(internal_revset))) + Ok(Box::new(RevsetImpl::new(internal_revset, index))) } fn internal_evaluate<'index>( diff --git a/lib/src/revset.rs b/lib/src/revset.rs index 9f4c1701d..6e79e147d 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -27,7 +27,7 @@ use pest::Parser; use pest_derive::Parser; use thiserror::Error; -use crate::backend::{BackendError, BackendResult, CommitId, ObjectId}; +use crate::backend::{BackendError, BackendResult, ChangeId, CommitId, ObjectId}; use crate::commit::Commit; use crate::default_index_store::IndexEntry; use crate::hex_util::to_forward_hex; @@ -1577,9 +1577,32 @@ pub trait Revset<'index> { fn iter_graph(&self) -> Box)> + '_>; + fn change_id_index(&self) -> Box; + fn is_empty(&self) -> bool; } +pub trait ChangeIdIndex { + /// Resolve an unambiguous change ID prefix to the commit IDs in the revset. + fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution>; + + /// This function returns the shortest length of a prefix of `key` that + /// disambiguates it from every other key in the index. + /// + /// The length to be returned is a number of hexadecimal digits. + /// + /// This has some properties that we do not currently make much use of: + /// + /// - The algorithm works even if `key` itself is not in the index. + /// + /// - In the special case when there are keys in the trie for which our + /// `key` is an exact prefix, returns `key.len() + 1`. Conceptually, in + /// order to disambiguate, you need every letter of the key *and* the + /// additional fact that it's the entire key). This case is extremely + /// unlikely for hashes with 12+ hexadecimal characters. + fn shortest_unique_prefix_len(&self, change_id: &ChangeId) -> usize; +} + #[derive(Debug, PartialEq, Eq, Clone, Hash)] pub struct RevsetGraphEdge { pub target: CommitId, diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index aed557492..a0cefb788 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -16,9 +16,10 @@ use std::path::Path; use assert_matches::assert_matches; use itertools::Itertools; -use jujutsu_lib::backend::{CommitId, MillisSinceEpoch, ObjectId, Signature, Timestamp}; +use jujutsu_lib::backend::{ChangeId, CommitId, MillisSinceEpoch, ObjectId, Signature, Timestamp}; use jujutsu_lib::commit::Commit; use jujutsu_lib::git; +use jujutsu_lib::index::{HexPrefix, PrefixResolution}; use jujutsu_lib::op_store::{RefTarget, WorkspaceId}; use jujutsu_lib::repo::Repo; use jujutsu_lib::repo_path::RepoPath; @@ -2101,3 +2102,109 @@ fn test_reverse_graph_iterator() { ); assert_eq!(commits[4].1, vec![]); } + +#[test] +fn test_change_id_index() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(true); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings, "test"); + + let root_commit = repo.store().root_commit(); + let mut commit_number = 0; + let mut commit_with_change_id = |change_id: &str| { + commit_number += 1; + tx.mut_repo() + .new_commit( + &settings, + vec![root_commit.id().clone()], + root_commit.tree_id().clone(), + ) + .set_change_id(ChangeId::from_hex(change_id)) + .set_description(format!("commit {commit_number}")) + .write() + .unwrap() + }; + let commit_1 = commit_with_change_id("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + let commit_2 = commit_with_change_id("aaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + let commit_3 = commit_with_change_id("abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + let commit_4 = commit_with_change_id("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + let commit_5 = commit_with_change_id("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + + let revset = revset_for_commits( + tx.repo(), + &[ + &root_commit, + &commit_1, + &commit_2, + &commit_3, + &commit_4, + &commit_5, + ], + ); + let change_id_index = revset.change_id_index(); + let prefix_len = + |commit: &Commit| change_id_index.shortest_unique_prefix_len(commit.change_id()); + assert_eq!(prefix_len(&root_commit), 1); + assert_eq!(prefix_len(&commit_1), 6); + assert_eq!(prefix_len(&commit_2), 6); + assert_eq!(prefix_len(&commit_3), 2); + assert_eq!(prefix_len(&commit_4), 1); + assert_eq!(prefix_len(&commit_5), 1); + let resolve_prefix = + |prefix: &str| change_id_index.resolve_prefix(&HexPrefix::new(prefix).unwrap()); + // Ambiguous matches + assert_eq!(resolve_prefix("a"), PrefixResolution::AmbiguousMatch); + assert_eq!(resolve_prefix("aaaaa"), PrefixResolution::AmbiguousMatch); + // Exactly the necessary length + assert_eq!( + resolve_prefix("0"), + PrefixResolution::SingleMatch(vec![root_commit.id().clone()]) + ); + assert_eq!( + resolve_prefix("aaaaaa"), + PrefixResolution::SingleMatch(vec![commit_1.id().clone()]) + ); + assert_eq!( + resolve_prefix("aaaaab"), + PrefixResolution::SingleMatch(vec![commit_2.id().clone()]) + ); + assert_eq!( + resolve_prefix("ab"), + PrefixResolution::SingleMatch(vec![commit_3.id().clone()]) + ); + assert_eq!( + resolve_prefix("b"), + PrefixResolution::SingleMatch(vec![commit_5.id().clone(), commit_4.id().clone()]) + ); + // Longer than necessary + assert_eq!( + resolve_prefix("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + PrefixResolution::SingleMatch(vec![commit_1.id().clone()]) + ); + // No match + assert_eq!(resolve_prefix("ba"), PrefixResolution::NoMatch); + + // Test with a revset containing only some of the commits. We should get shorter + // prefixes and be able to resolve shorter prefixes. + let revset = revset_for_commits(tx.repo(), &[&commit_2, &commit_3]); + let change_id_index = revset.change_id_index(); + let prefix_len = + |commit: &Commit| change_id_index.shortest_unique_prefix_len(commit.change_id()); + assert_eq!(prefix_len(&commit_1), 6); + assert_eq!(prefix_len(&commit_2), 2); + assert_eq!(prefix_len(&commit_3), 2); + let resolve_prefix = + |prefix: &str| change_id_index.resolve_prefix(&HexPrefix::new(prefix).unwrap()); + assert_eq!(resolve_prefix("0"), PrefixResolution::NoMatch); + assert_eq!( + resolve_prefix("aa"), + PrefixResolution::SingleMatch(vec![commit_2.id().clone()]) + ); + assert_eq!( + resolve_prefix("ab"), + PrefixResolution::SingleMatch(vec![commit_3.id().clone()]) + ); + assert_eq!(resolve_prefix("a"), PrefixResolution::AmbiguousMatch); +}