revset: add latest(candidates, count) predicate

This serves the role of limit() in Mercurial. Since revsets in JJ is
(conceptually) an unordered set, a "limit" predicate should define its
ordering criteria. That's why the added predicate is named as "latest".

Closes #1110
This commit is contained in:
Yuya Nishihara 2023-03-25 12:14:26 +09:00
parent 185549f031
commit 0532301e03
7 changed files with 183 additions and 4 deletions

View File

@ -59,6 +59,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
to the configured user. `jj describe` also gained a `--no-edit` option to
avoid opening the editor.
* Added `latest(x[, n])` revset function to select the latest `n` commits.
### Fixed bugs
* Modify/delete conflicts now include context lines

View File

@ -108,6 +108,8 @@ revsets (expressions) as arguments.
If `x` was not specified, it selects all visible heads (as if you had said
`heads(all())`).
* `roots(x)`: Commits in `x` that are not descendants of other commits in `x`.
* `latest(x[, count])`: Latest `count` commits in `x`, based on committer
timestamp. The default `count` is 1.
* `merges()`: Merge commits.
* `description(needle)`: Commits with the given string in their
description.

View File

@ -1131,7 +1131,7 @@ pub struct IndexStats {
}
#[derive(Clone, Eq, PartialEq)]
pub struct IndexEntryByPosition<'a>(IndexEntry<'a>);
pub struct IndexEntryByPosition<'a>(pub IndexEntry<'a>);
impl Ord for IndexEntryByPosition<'_> {
fn cmp(&self, other: &Self) -> Ordering {

View File

@ -13,13 +13,13 @@
// limitations under the License.
use std::cmp::{Ordering, Reverse};
use std::collections::HashSet;
use std::collections::{BinaryHeap, HashSet};
use std::iter::Peekable;
use itertools::Itertools;
use crate::backend::{ChangeId, CommitId, ObjectId};
use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition};
use crate::backend::{ChangeId, CommitId, MillisSinceEpoch, ObjectId};
use crate::default_index_store::{CompositeIndex, IndexEntry, IndexEntryByPosition, IndexPosition};
use crate::default_revset_graph_iterator::RevsetGraphIterator;
use crate::index::{HexPrefix, PrefixResolution};
use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher};
@ -661,6 +661,10 @@ fn internal_evaluate<'index>(
}
Ok(revset_for_commit_ids(repo, &commit_ids))
}
RevsetExpression::Latest { candidates, count } => {
let candidate_set = internal_evaluate(repo, candidates)?;
Ok(take_latest_revset(repo, candidate_set.as_ref(), *count))
}
RevsetExpression::Filter(predicate) => Ok(Box::new(FilterRevset {
candidates: internal_evaluate(repo, &RevsetExpression::All)?,
predicate: build_predicate_fn(repo, predicate),
@ -722,6 +726,51 @@ fn revset_for_commit_ids<'index>(
Box::new(EagerRevset { index_entries })
}
fn take_latest_revset<'index>(
repo: &dyn Repo,
candidate_set: &dyn InternalRevset<'index>,
count: usize,
) -> Box<dyn InternalRevset<'index> + 'index> {
if count == 0 {
return Box::new(EagerRevset::empty());
}
#[derive(Clone, Eq, Ord, PartialEq, PartialOrd)]
struct Item<'a> {
timestamp: MillisSinceEpoch,
entry: IndexEntryByPosition<'a>, // tie-breaker
}
let store = repo.store();
let make_rev_item = |entry: IndexEntry<'index>| {
let commit = store.get_commit(&entry.commit_id()).unwrap();
Reverse(Item {
timestamp: commit.committer().timestamp.timestamp.clone(),
entry: IndexEntryByPosition(entry),
})
};
// Maintain min-heap containing the latest (greatest) count items. For small
// count and large candidate set, this is probably cheaper than building vec
// and applying selection algorithm.
let mut candidate_iter = candidate_set.iter().map(make_rev_item).fuse();
let mut latest_items = BinaryHeap::from_iter(candidate_iter.by_ref().take(count));
for item in candidate_iter {
let mut earliest = latest_items.peek_mut().unwrap();
if earliest.0 < item.0 {
*earliest = item;
}
}
assert!(latest_items.len() <= count);
let mut index_entries = latest_items
.into_iter()
.map(|item| item.0.entry.0)
.collect_vec();
index_entries.sort_unstable_by_key(|b| Reverse(b.position()));
Box::new(EagerRevset { index_entries })
}
type PurePredicateFn<'index> = Box<dyn Fn(&IndexEntry<'index>) -> bool + 'index>;
impl<'index> ToPredicateFn<'index> for PurePredicateFn<'index> {

View File

@ -232,6 +232,10 @@ pub enum RevsetExpression {
Tags,
GitRefs,
GitHead,
Latest {
candidates: Rc<RevsetExpression>,
count: usize,
},
Filter(RevsetFilterPredicate),
/// Marker for subtree that should be intersected as filter.
AsFilter(Rc<RevsetExpression>),
@ -294,6 +298,13 @@ impl RevsetExpression {
Rc::new(RevsetExpression::GitHead)
}
pub fn latest(self: &Rc<RevsetExpression>, count: usize) -> Rc<RevsetExpression> {
Rc::new(RevsetExpression::Latest {
candidates: self.clone(),
count,
})
}
pub fn filter(predicate: RevsetFilterPredicate) -> Rc<RevsetExpression> {
Rc::new(RevsetExpression::Filter(predicate))
}
@ -835,6 +846,16 @@ static BUILTIN_FUNCTION_MAP: Lazy<HashMap<&'static str, RevsetFunction>> = Lazy:
expect_no_arguments(name, arguments_pair)?;
Ok(RevsetExpression::git_head())
});
map.insert("latest", |name, arguments_pair, state| {
let ([candidates_arg], [count_opt_arg]) = expect_arguments(name, arguments_pair)?;
let candidates = parse_expression_rule(candidates_arg.into_inner(), state)?;
let count = if let Some(count_arg) = count_opt_arg {
parse_function_argument_as_literal("integer", name, count_arg, state)?
} else {
1
};
Ok(candidates.latest(count))
});
map.insert("merges", |name, arguments_pair, _state| {
expect_no_arguments(name, arguments_pair)?;
Ok(RevsetExpression::filter(
@ -1139,6 +1160,12 @@ fn try_transform_expression_bottom_up(
RevsetExpression::Tags => None,
RevsetExpression::GitRefs => None,
RevsetExpression::GitHead => None,
RevsetExpression::Latest { candidates, count } => {
transform_rec(candidates, f)?.map(|candidates| RevsetExpression::Latest {
candidates,
count: *count,
})
}
RevsetExpression::Filter(_) => None,
RevsetExpression::AsFilter(candidates) => {
transform_rec(candidates, f)?.map(RevsetExpression::AsFilter)
@ -2373,6 +2400,11 @@ mod tests {
RevsetExpression::branches("".to_owned()).roots()
);
assert_eq!(
optimize(parse("latest(branches() & all(), 2)").unwrap()),
RevsetExpression::branches("".to_owned()).latest(2)
);
assert_eq!(
optimize(parse("present(author(foo) ~ bar)").unwrap()),
Rc::new(RevsetExpression::AsFilter(Rc::new(

View File

@ -1506,6 +1506,90 @@ fn test_evaluate_expression_remote_branches(use_git: bool) {
);
}
#[test_case(false ; "local backend")]
#[test_case(true ; "git backend")]
fn test_evaluate_expression_latest(use_git: bool) {
let settings = testutils::user_settings();
let test_repo = TestRepo::init(use_git);
let repo = &test_repo.repo;
let mut tx = repo.start_transaction(&settings, "test");
let mut_repo = tx.mut_repo();
let mut write_commit_with_committer_timestamp = |msec| {
let builder = create_random_commit(mut_repo, &settings);
let mut committer = builder.committer().clone();
committer.timestamp.timestamp = MillisSinceEpoch(msec);
builder.set_committer(committer).write().unwrap()
};
let commit1_t3 = write_commit_with_committer_timestamp(3);
let commit2_t2 = write_commit_with_committer_timestamp(2);
let commit3_t2 = write_commit_with_committer_timestamp(2);
let commit4_t1 = write_commit_with_committer_timestamp(1);
// Pick the latest entry by default (count = 1)
assert_eq!(
resolve_commit_ids(mut_repo, "latest(all())"),
vec![commit1_t3.id().clone()],
);
// Should not panic with count = 0 or empty set
assert_eq!(resolve_commit_ids(mut_repo, "latest(all(), 0)"), vec![]);
assert_eq!(resolve_commit_ids(mut_repo, "latest(none())"), vec![]);
assert_eq!(
resolve_commit_ids(mut_repo, "latest(all(), 1)"),
vec![commit1_t3.id().clone()],
);
// Tie-breaking: pick the later entry in position
assert_eq!(
resolve_commit_ids(mut_repo, "latest(all(), 2)"),
vec![commit3_t2.id().clone(), commit1_t3.id().clone()],
);
assert_eq!(
resolve_commit_ids(mut_repo, "latest(all(), 3)"),
vec![
commit3_t2.id().clone(),
commit2_t2.id().clone(),
commit1_t3.id().clone(),
],
);
assert_eq!(
resolve_commit_ids(mut_repo, "latest(all(), 4)"),
vec![
commit4_t1.id().clone(),
commit3_t2.id().clone(),
commit2_t2.id().clone(),
commit1_t3.id().clone(),
],
);
assert_eq!(
resolve_commit_ids(mut_repo, "latest(all(), 5)"),
vec![
commit4_t1.id().clone(),
commit3_t2.id().clone(),
commit2_t2.id().clone(),
commit1_t3.id().clone(),
mut_repo.store().root_commit_id().clone(),
],
);
// Should not panic if count is larger than the candidates size
assert_eq!(
resolve_commit_ids(mut_repo, "latest(~root, 5)"),
vec![
commit4_t1.id().clone(),
commit3_t2.id().clone(),
commit2_t2.id().clone(),
commit1_t3.id().clone(),
],
);
}
#[test_case(false ; "local backend")]
#[test_case(true ; "git backend")]
fn test_evaluate_expression_merges(use_git: bool) {

View File

@ -101,6 +101,16 @@ fn test_bad_function_call() {
= Invalid arguments to revset function "heads": Expected 0 to 1 arguments
"###);
let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "latest(a, not_an_integer)"]);
insta::assert_snapshot!(stderr, @r###"
Error: Failed to parse revset: --> 1:11
|
1 | latest(a, not_an_integer)
| ^------------^
|
= Invalid arguments to revset function "latest": Expected function argument of type integer
"###);
let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "file()"]);
insta::assert_snapshot!(stderr, @r###"
Error: Failed to parse revset: --> 1:6