dag: further reduce remote lookups calculating "definitely missing" vertexes

Summary:
See the added comments. In some cases we can avoid remote lookups. This would
help make commit/amend more offline friendly.

Reviewed By: DurhamG

Differential Revision: D30004908

fbshipit-source-id: 94fbc7934a1eb3ae1058d8c542211a885d5ad8e6
This commit is contained in:
Jun Wu 2021-08-09 17:04:10 -07:00 committed by Facebook GitHub Bot
parent ca41a68c6d
commit c971369c84
3 changed files with 134 additions and 33 deletions

View File

@ -857,7 +857,7 @@ where
let mut missing = self.missing_vertexes_confirmed_by_remote.write();
for v in unassigned {
if missing.insert(v.clone()) {
tracing::trace!(target: "dag::cache", "cached missing {:?} (ancestor missing)", &v);
tracing::trace!(target: "dag::cache", "cached missing {:?} (definitely missing)", &v);
}
}
}
@ -865,7 +865,8 @@ where
}
}
/// Calculate vertexes that are definitely not assigned according to
/// Calculate vertexes that are definitely not assigned (not in the IdMap,
/// and not in the lazy part of the IdMap) according to
/// `hint_pending_subdag`. This does not report all unassigned vertexes.
/// But the reported vertexes are guaranteed not assigned.
///
@ -874,13 +875,130 @@ where
///
/// This function visits the "roots" of "parents", and if they are not assigned,
/// then add their descendants to the "unassigned" result set.
async fn calculate_definitely_unassigned_vertexes(
this: &dyn IdConvert,
async fn calculate_definitely_unassigned_vertexes<IS, M, P, S>(
this: &AbstractNameDag<IdDag<IS>, M, P, S>,
parents: &dyn Parents,
heads: &[VertexName],
) -> Result<Vec<VertexName>> {
) -> Result<Vec<VertexName>>
where
IS: IdDagStore,
IdDag<IS>: TryClone,
M: TryClone + IdMapAssignHead + Send + Sync + 'static,
P: TryClone + Send + Sync + 'static,
S: TryClone + Send + Sync + 'static,
{
// subdag: vertexes to insert
//
// For example, when adding C---D to the graph A---B:
//
// A---B
// \
// C---D
//
// The subdag is C---D (C does not have parent).
//
// Extra checks are needed because upon reload, the main graph
// A---B might already contain part of the subdag to be added.
let subdag = parents.hint_subdag_for_insertion(heads).await?;
let mut remaining = subdag.all().await?;
let mut unassigned = NameSet::empty();
// For lazy graph, avoid some remote lookups by figuring out
// some definitely unassigned (missing) vertexes. For example,
//
// A---B---C
// \
// D---E
//
// When adding D---E (subdag, new vertex that might trigger remote
// lookup) with parent B to the main graph (A--B--C),
// 1. If B exists, and is not in the master group, then B and its
// descendants cannot be not lazy, and there is no need to lookup
// D remotely.
// 2. If B exists, and is in the master group, and all its children
// except D (i.e. C) are known locally, and the vertex name of D
// does not match other children (C), we know that D cannot be
// in the lazy part of the main graph, and can skip the remote
// lookup.
let mut unassigned_roots = Vec::new();
if this.is_vertex_lazy() {
let roots = subdag.roots(remaining.clone()).await?;
let mut roots_iter = roots.iter().await?;
while let Some(root) = roots_iter.next().await {
let root = root?;
// Do a local "contains" check.
if matches!(
&this.contains_vertex_name_locally(&[root.clone()]).await?[..],
[true]
) {
tracing::debug!(target: "dag::definitelymissing", "root {:?} is already known", &root);
continue;
}
let root_parents_id_set = {
let root_parents = parents.parent_names(root.clone()).await?;
let root_parents_set = match this
.sort(&NameSet::from_static_names(root_parents))
.await
{
Ok(set) => set,
Err(_) => {
tracing::trace!(target: "dag::definitelymissing", "root {:?} is unclear (parents cannot be resolved)", &root);
continue;
}
};
this.to_id_set(&root_parents_set).await?
};
// If there are no parents of `root`, we cannot confidently test
// whether `root` is missing or not.
if root_parents_id_set.is_empty() {
tracing::trace!(target: "dag::definitelymissing", "root {:?} is unclear (no parents)", &root);
continue;
}
// All parents of `root` are non-lazy.
// So `root` is non-lazy and the local "contains" check is the same
// as a remote "contains" check.
if root_parents_id_set
.iter()
.all(|i| i.group() == Group::NON_MASTER)
{
tracing::debug!(target: "dag::definitelymissing", "root {:?} is not assigned (non-lazy parent)", &root);
unassigned_roots.push(root);
continue;
}
// All children of lazy parents of `root` are known locally.
// So `root` cannot match an existing vertex in the lazy graph.
let children_ids: Vec<Id> = this.dag.children(root_parents_id_set)?.iter().collect();
if this
.map
.contains_vertex_id_locally(&children_ids)
.await?
.iter()
.all(|b| *b)
{
tracing::debug!(target: "dag::definitelymissing", "root {:?} is not assigned (children of parents are known)", &root);
unassigned_roots.push(root);
continue;
}
tracing::trace!(target: "dag::definitelymissing", "root {:?} is unclear", &root);
}
if !unassigned_roots.is_empty() {
unassigned = subdag
.descendants(NameSet::from_static_names(unassigned_roots))
.await?;
remaining = remaining.difference(&unassigned);
}
}
// Figure out unassigned (missing) vertexes that do need to be inserted.
//
// remaining: vertexes to query.
// unassigned: vertexes known unassigned.
// assigned: vertexes known assigned.
@ -892,9 +1010,6 @@ async fn calculate_definitely_unassigned_vertexes(
// - Include descendants(subset_unassigned) in "unassigned".
// - Exclude "assigned" and "unassigned" from "remaining".
let mut remaining = subdag.all().await?;
let mut unassigned = NameSet::empty();
for i in 1usize.. {
let remaining_old_len = remaining.count().await?;
if remaining_old_len == 0 {
@ -943,8 +1058,9 @@ async fn calculate_definitely_unassigned_vertexes(
unassigned = unassigned.union(&subdag.descendants(new_unassigned).await?);
let unassigned_new_len = unassigned.count().await?;
tracing::debug!(
"calculate_unassigned: #{} remaining {} => {}, unassigned: {} => {}",
tracing::trace!(
target: "dag::definitelymissing",
"#{} remaining {} => {}, unassigned: {} => {}",
i,
remaining_old_len,
remaining_new_len,
@ -952,6 +1068,7 @@ async fn calculate_definitely_unassigned_vertexes(
unassigned_new_len
);
}
tracing::debug!(target: "dag::definitelymissing", "unassigned (missing): {:?}", &unassigned);
let unassigned = unassigned.iter().await?.try_collect().await?;
Ok(unassigned)

View File

@ -141,7 +141,7 @@ async fn test_add_heads() {
);
client.dag.flush(&["G".into()]).await.unwrap();
assert_eq!(client.output(), ["resolve names: [I, C], heads: [B]"]);
assert_eq!(client.output(), ["resolve names: [I], heads: [B]"]);
let mut client = server.client_cloned_data().await;
client

View File

@ -194,31 +194,15 @@ Read file content:
TRACE eagerepo::api: found: a2e456504a5e61f763f1a0b36a6c247c7541b2b3, 41 bytes
C (no-eol)
Making a commit and amend:
(Triggers remote lookup 1 time!)
#if no-windows
(Path separator is different on Windows)
Make a commit on tip, and amend. They do not trigger remote lookups:
$ echo Z > Z
$ LOG=dag::protocol=debug,dag::open=debug,dag::cache=trace hg commit -Am Z Z
DEBUG dag::open: open at "$TESTTMP/e1/.hg/store/segments/v1"
DEBUG dag::open: open at "$TESTTMP/cloned/.hg/store/segments/v1"
DEBUG dag::open: open at "$TESTTMP/e1/.hg/store/segments/v1"
DEBUG dag::protocol: resolve names [567c5fc544ed12bf9619197fdd5263d6c3129cd0] remotely
TRACE dag::cache: cached missing 567c5fc544ed12bf9619197fdd5263d6c3129cd0 (server confirmed)
DEBUG dag::open: open at "$TESTTMP/cloned/.hg/store/segments/v1"
$ LOG=warn hg up -q tip
$ LOG=dag::protocol=debug,dag::cache=trace hg commit -Am Z Z
TRACE dag::cache: cached missing ae226a63078b2a472fa38ec61318bb37e8c10bfb (definitely missing)
DEBUG dag::cache: reusing cache (1 missing)
$ LOG=dag::protocol=debug,dag::open=debug,dag::cache=trace hg amend -m Z1
DEBUG dag::open: open at "$TESTTMP/e1/.hg/store/segments/v1"
DEBUG dag::open: open at "$TESTTMP/cloned/.hg/store/segments/v1"
DEBUG dag::open: open at "$TESTTMP/e1/.hg/store/segments/v1"
DEBUG dag::protocol: resolve names [26ef60562bd4f4205f24250ea9d2e24e61108072] remotely
TRACE dag::cache: cached missing 26ef60562bd4f4205f24250ea9d2e24e61108072 (server confirmed)
DEBUG dag::open: open at "$TESTTMP/cloned/.hg/store/segments/v1"
$ LOG=dag::protocol=debug,dag::cache=trace hg amend -m Z1
TRACE dag::cache: cached missing 893a1eb784b46325fb3062573ba15a22780ebe4a (definitely missing)
DEBUG dag::cache: reusing cache (1 missing)
DEBUG dag::open: open at "$TESTTMP/cloned/.hg/store/segments/v1"
DEBUG dag::cache: reusing cache (1 missing)
#endif