revset: add a method returning a change id index

One of the remaining places we depend on index positions is when
creating a `ChangeIdIndex`. This moves that into the revset engine
(which is coupled to the commit index implementation) by adding a
`Revset::change_id_index()` method. We will also use this function
later when add support for resolving change id prefixes within a small
revset.

The current implementation simply creates an in-memory index using the
existing `IdIndex` we have in `repo.rs`.

The custom implementation at Google might do the same for small
revsets that are available on the client, but for revsets involving
many commits on the server, it might use a suboptimmal implementation
that uses longer-than-necessary prefixes for performance reasons. That
can be done by querying a server-side index including changes not in
the revset, and then verifying that the resulting commits are actually
in the revset.
This commit is contained in:
Martin von Zweigbergk 2023-03-20 16:19:19 -07:00 committed by Martin von Zweigbergk
parent a65e0e771c
commit 27a7fccefa
4 changed files with 182 additions and 14 deletions

View File

@ -741,7 +741,7 @@ impl Index for MutableIndexImpl {
repo: &'index dyn Repo,
expression: &RevsetExpression,
) -> Result<Box<dyn Revset<'index> + 'index>, RevsetError> {
default_revset_engine::evaluate(repo, expression)
default_revset_engine::evaluate(repo, CompositeIndex(self), expression)
}
}
@ -833,7 +833,7 @@ trait IndexSegment {
}
#[derive(Clone)]
struct CompositeIndex<'a>(&'a dyn IndexSegment);
pub struct CompositeIndex<'a>(&'a dyn IndexSegment);
impl<'a> CompositeIndex<'a> {
fn ancestor_files_without_local(&self) -> impl Iterator<Item = &Arc<ReadonlyIndexImpl>> {
@ -890,7 +890,7 @@ impl<'a> CompositeIndex<'a> {
}
}
fn entry_by_pos(&self, pos: IndexPosition) -> IndexEntry<'a> {
pub fn entry_by_pos(&self, pos: IndexPosition) -> IndexEntry<'a> {
let num_parent_commits = self.0.segment_num_parent_commits();
if pos.0 >= num_parent_commits {
self.0.segment_entry_by_pos(pos, pos.0 - num_parent_commits)
@ -960,7 +960,7 @@ impl<'a> CompositeIndex<'a> {
})
}
fn entry_by_id(&self, commit_id: &CommitId) -> Option<IndexEntry<'a>> {
pub(crate) fn entry_by_id(&self, commit_id: &CommitId) -> Option<IndexEntry<'a>> {
self.commit_id_to_pos(commit_id)
.map(|pos| self.entry_by_pos(pos))
}
@ -1822,7 +1822,7 @@ impl Index for ReadonlyIndexImpl {
repo: &'index dyn Repo,
expression: &RevsetExpression,
) -> Result<Box<dyn Revset<'index> + 'index>, RevsetError> {
default_revset_engine::evaluate(repo, expression)
default_revset_engine::evaluate(repo, CompositeIndex(self), expression)
}
}

View File

@ -18,13 +18,14 @@ use std::iter::Peekable;
use itertools::Itertools;
use crate::backend::CommitId;
use crate::default_index_store::IndexEntry;
use crate::backend::{ChangeId, CommitId};
use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition};
use crate::default_revset_graph_iterator::RevsetGraphIterator;
use crate::index::{HexPrefix, PrefixResolution};
use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher};
use crate::repo::Repo;
use crate::repo::{IdIndex, Repo};
use crate::revset::{
Revset, RevsetError, RevsetExpression, RevsetFilterPredicate, RevsetGraphEdge,
ChangeIdIndex, Revset, RevsetError, RevsetExpression, RevsetFilterPredicate, RevsetGraphEdge,
RevsetIteratorExt, GENERATION_RANGE_FULL,
};
use crate::rewrite;
@ -52,11 +53,18 @@ trait InternalRevset<'index>: ToPredicateFn<'index> {
struct RevsetImpl<'index> {
inner: Box<dyn InternalRevset<'index> + 'index>,
index: CompositeIndex<'index>,
}
impl<'index> RevsetImpl<'index> {
fn new(revset: Box<dyn InternalRevset<'index> + 'index>) -> Self {
Self { inner: revset }
fn new(
revset: Box<dyn InternalRevset<'index> + 'index>,
index: CompositeIndex<'index>,
) -> Self {
Self {
inner: revset,
index,
}
}
}
@ -69,11 +77,40 @@ impl<'index> Revset<'index> for RevsetImpl<'index> {
Box::new(RevsetGraphIterator::new(self))
}
fn change_id_index(&self) -> Box<dyn ChangeIdIndex + 'index> {
// TODO: Create a persistent lookup from change id to commit ids.
let mut pos_by_change = vec![];
for entry in self.inner.iter() {
pos_by_change.push((entry.change_id(), entry.position()));
}
let pos_by_change = IdIndex::from_vec(pos_by_change);
Box::new(ChangeIdIndexImpl {
index: self.index.clone(),
pos_by_change,
})
}
fn is_empty(&self) -> bool {
self.iter().next().is_none()
}
}
struct ChangeIdIndexImpl<'index> {
index: CompositeIndex<'index>,
pos_by_change: IdIndex<ChangeId, IndexPosition>,
}
impl ChangeIdIndex for ChangeIdIndexImpl<'_> {
fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<Vec<CommitId>> {
self.pos_by_change
.resolve_prefix_with(prefix, |pos| self.index.entry_by_pos(*pos).commit_id())
}
fn shortest_unique_prefix_len(&self, change_id: &ChangeId) -> usize {
self.pos_by_change.shortest_unique_prefix_len(change_id)
}
}
struct EagerRevset<'index> {
index_entries: Vec<IndexEntry<'index>>,
}
@ -376,10 +413,11 @@ impl<'index, I1: Iterator<Item = IndexEntry<'index>>, I2: Iterator<Item = IndexE
pub fn evaluate<'index>(
repo: &'index dyn Repo,
index: CompositeIndex<'index>,
expression: &RevsetExpression,
) -> Result<Box<dyn Revset<'index> + 'index>, RevsetError> {
let internal_revset = internal_evaluate(repo, expression)?;
Ok(Box::new(RevsetImpl::new(internal_revset)))
Ok(Box::new(RevsetImpl::new(internal_revset, index)))
}
fn internal_evaluate<'index>(

View File

@ -27,7 +27,7 @@ use pest::Parser;
use pest_derive::Parser;
use thiserror::Error;
use crate::backend::{BackendError, BackendResult, CommitId, ObjectId};
use crate::backend::{BackendError, BackendResult, ChangeId, CommitId, ObjectId};
use crate::commit::Commit;
use crate::default_index_store::IndexEntry;
use crate::hex_util::to_forward_hex;
@ -1577,9 +1577,32 @@ pub trait Revset<'index> {
fn iter_graph(&self) -> Box<dyn Iterator<Item = (CommitId, Vec<RevsetGraphEdge>)> + '_>;
fn change_id_index(&self) -> Box<dyn ChangeIdIndex + 'index>;
fn is_empty(&self) -> bool;
}
pub trait ChangeIdIndex {
/// Resolve an unambiguous change ID prefix to the commit IDs in the revset.
fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<Vec<CommitId>>;
/// This function returns the shortest length of a prefix of `key` that
/// disambiguates it from every other key in the index.
///
/// The length to be returned is a number of hexadecimal digits.
///
/// This has some properties that we do not currently make much use of:
///
/// - The algorithm works even if `key` itself is not in the index.
///
/// - In the special case when there are keys in the trie for which our
/// `key` is an exact prefix, returns `key.len() + 1`. Conceptually, in
/// order to disambiguate, you need every letter of the key *and* the
/// additional fact that it's the entire key). This case is extremely
/// unlikely for hashes with 12+ hexadecimal characters.
fn shortest_unique_prefix_len(&self, change_id: &ChangeId) -> usize;
}
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct RevsetGraphEdge {
pub target: CommitId,

View File

@ -16,9 +16,10 @@ use std::path::Path;
use assert_matches::assert_matches;
use itertools::Itertools;
use jujutsu_lib::backend::{CommitId, MillisSinceEpoch, ObjectId, Signature, Timestamp};
use jujutsu_lib::backend::{ChangeId, CommitId, MillisSinceEpoch, ObjectId, Signature, Timestamp};
use jujutsu_lib::commit::Commit;
use jujutsu_lib::git;
use jujutsu_lib::index::{HexPrefix, PrefixResolution};
use jujutsu_lib::op_store::{RefTarget, WorkspaceId};
use jujutsu_lib::repo::Repo;
use jujutsu_lib::repo_path::RepoPath;
@ -2101,3 +2102,109 @@ fn test_reverse_graph_iterator() {
);
assert_eq!(commits[4].1, vec![]);
}
#[test]
fn test_change_id_index() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init(true);
let repo = &test_repo.repo;
let mut tx = repo.start_transaction(&settings, "test");
let root_commit = repo.store().root_commit();
let mut commit_number = 0;
let mut commit_with_change_id = |change_id: &str| {
commit_number += 1;
tx.mut_repo()
.new_commit(
&settings,
vec![root_commit.id().clone()],
root_commit.tree_id().clone(),
)
.set_change_id(ChangeId::from_hex(change_id))
.set_description(format!("commit {commit_number}"))
.write()
.unwrap()
};
let commit_1 = commit_with_change_id("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
let commit_2 = commit_with_change_id("aaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
let commit_3 = commit_with_change_id("abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
let commit_4 = commit_with_change_id("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
let commit_5 = commit_with_change_id("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
let revset = revset_for_commits(
tx.repo(),
&[
&root_commit,
&commit_1,
&commit_2,
&commit_3,
&commit_4,
&commit_5,
],
);
let change_id_index = revset.change_id_index();
let prefix_len =
|commit: &Commit| change_id_index.shortest_unique_prefix_len(commit.change_id());
assert_eq!(prefix_len(&root_commit), 1);
assert_eq!(prefix_len(&commit_1), 6);
assert_eq!(prefix_len(&commit_2), 6);
assert_eq!(prefix_len(&commit_3), 2);
assert_eq!(prefix_len(&commit_4), 1);
assert_eq!(prefix_len(&commit_5), 1);
let resolve_prefix =
|prefix: &str| change_id_index.resolve_prefix(&HexPrefix::new(prefix).unwrap());
// Ambiguous matches
assert_eq!(resolve_prefix("a"), PrefixResolution::AmbiguousMatch);
assert_eq!(resolve_prefix("aaaaa"), PrefixResolution::AmbiguousMatch);
// Exactly the necessary length
assert_eq!(
resolve_prefix("0"),
PrefixResolution::SingleMatch(vec![root_commit.id().clone()])
);
assert_eq!(
resolve_prefix("aaaaaa"),
PrefixResolution::SingleMatch(vec![commit_1.id().clone()])
);
assert_eq!(
resolve_prefix("aaaaab"),
PrefixResolution::SingleMatch(vec![commit_2.id().clone()])
);
assert_eq!(
resolve_prefix("ab"),
PrefixResolution::SingleMatch(vec![commit_3.id().clone()])
);
assert_eq!(
resolve_prefix("b"),
PrefixResolution::SingleMatch(vec![commit_5.id().clone(), commit_4.id().clone()])
);
// Longer than necessary
assert_eq!(
resolve_prefix("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
PrefixResolution::SingleMatch(vec![commit_1.id().clone()])
);
// No match
assert_eq!(resolve_prefix("ba"), PrefixResolution::NoMatch);
// Test with a revset containing only some of the commits. We should get shorter
// prefixes and be able to resolve shorter prefixes.
let revset = revset_for_commits(tx.repo(), &[&commit_2, &commit_3]);
let change_id_index = revset.change_id_index();
let prefix_len =
|commit: &Commit| change_id_index.shortest_unique_prefix_len(commit.change_id());
assert_eq!(prefix_len(&commit_1), 6);
assert_eq!(prefix_len(&commit_2), 2);
assert_eq!(prefix_len(&commit_3), 2);
let resolve_prefix =
|prefix: &str| change_id_index.resolve_prefix(&HexPrefix::new(prefix).unwrap());
assert_eq!(resolve_prefix("0"), PrefixResolution::NoMatch);
assert_eq!(
resolve_prefix("aa"),
PrefixResolution::SingleMatch(vec![commit_2.id().clone()])
);
assert_eq!(
resolve_prefix("ab"),
PrefixResolution::SingleMatch(vec![commit_3.id().clone()])
);
assert_eq!(resolve_prefix("a"), PrefixResolution::AmbiguousMatch);
}