Log performance stats during submodule expansion

Summary:
It will be very useful to know how much time we're spending on each part of the submodule expansion and validation flow.

It will help determine if and where we need to work on improving performance, because the inital import can run for a long time, but the same code will be used by live forward and backsyncer, so it needs to be reasonably fast.

Reviewed By: andreacampi

Differential Revision: D56561379

fbshipit-source-id: 26544914299e2ef022d472a0269920fe26f5b1f4
This commit is contained in:
Gustavo Galvao Avena 2024-04-29 03:41:49 -07:00 committed by Facebook GitHub Bot
parent 6f1834ac61
commit 31728b14ac
5 changed files with 107 additions and 43 deletions

View File

@ -42,6 +42,7 @@ filenodes = { version = "0.1.0", path = "../../filenodes" }
filestore = { version = "0.1.0", path = "../../filestore" }
fsnodes = { version = "0.1.0", path = "../../derived_data/fsnodes" }
futures = { version = "0.3.30", features = ["async-await", "compat"] }
futures_stats = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
git_types = { version = "0.1.0", path = "../../git/git_types" }
itertools = "0.11.0"
justknobs = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }

View File

@ -43,6 +43,7 @@ rust_library(
"//common/rust/shed/cloned:cloned",
"//common/rust/shed/facet:facet",
"//common/rust/shed/fbinit:fbinit",
"//common/rust/shed/futures_stats:futures_stats",
"//common/rust/shed/justknobs_stub:justknobs",
"//common/rust/shed/sorted_vector_map:sorted_vector_map",
"//common/rust/shed/sql:sql",

View File

@ -51,6 +51,7 @@ use crate::git_submodules::utils::get_x_repo_submodule_metadata_file_path;
use crate::git_submodules::utils::is_path_git_submodule;
use crate::git_submodules::utils::list_all_paths;
use crate::git_submodules::utils::list_non_submodule_files_under;
use crate::git_submodules::utils::run_and_log_stats_to_scuba;
use crate::git_submodules::utils::submodule_diff;
use crate::git_submodules::validation::validate_all_submodule_expansions;
use crate::types::Large;
@ -101,19 +102,28 @@ pub async fn expand_and_validate_all_git_submodule_file_changes<'a, R: Repo>(
"Can't sync changes from large to small repo if small repo has submodule expansion enabled"
);
let new_bonsai =
expand_all_git_submodule_file_changes(ctx, bonsai, small_repo, sm_exp_data.clone())
.await
.context("Failed to expand submodule file changes from bonsai")?;
let rewritten_bonsai = rewrite_commit(
let new_bonsai = run_and_log_stats_to_scuba(
ctx,
new_bonsai,
remapped_parents,
mover_to_multi_mover(mover.clone()),
small_repo,
"Expanding all git submodule file changes",
None,
rewrite_opts,
expand_all_git_submodule_file_changes(ctx, bonsai, small_repo, sm_exp_data.clone()),
)
.await
.context("Failed to expand submodule file changes from bonsai")?;
let rewritten_bonsai = run_and_log_stats_to_scuba(
ctx,
"Rewriting commit",
None,
rewrite_commit(
ctx,
new_bonsai,
remapped_parents,
mover_to_multi_mover(mover.clone()),
small_repo,
None,
rewrite_opts,
),
)
.await
.context("Failed to create bonsai to be synced")?
@ -121,8 +131,15 @@ pub async fn expand_and_validate_all_git_submodule_file_changes<'a, R: Repo>(
let rewritten_bonsai = rewritten_bonsai.freeze()?;
let validated_bonsai =
validate_all_submodule_expansions(ctx, sm_exp_data, rewritten_bonsai, mover).await?;
let validated_bonsai = run_and_log_stats_to_scuba(
ctx,
"Validating all submodule expansions",
None,
validate_all_submodule_expansions(ctx, sm_exp_data, rewritten_bonsai, mover),
)
.await
// TODO(gustavoavena): print some identifier of changeset that failed
.context("Validation of submodule expansion failed")?;
Ok(validated_bonsai.into_mut())
}
@ -146,13 +163,18 @@ async fn expand_all_git_submodule_file_changes<'a, R: Repo>(
match &fc {
FileChange::Change(tfc) => match &tfc.file_type() {
FileType::GitSubmodule => {
expand_git_submodule_file_change(
run_and_log_stats_to_scuba(
ctx,
small_repo,
sm_exp_data.clone(),
parents,
p,
tfc.content_id(),
"Expand git submodule file change",
format!("submodule_file_path: {p}"),
expand_git_submodule_file_change(
ctx,
small_repo,
sm_exp_data.clone(),
parents,
p,
tfc.content_id(),
),
)
.await
}
@ -704,12 +726,11 @@ async fn handle_submodule_deletion<'a, R: Repo>(
}
// This is a submodule file, so delete its entire expanded directory.
return delete_submodule_expansion(
return run_and_log_stats_to_scuba(
ctx,
small_repo,
sm_exp_data,
parents,
submodule_file_path,
"Deleting submodule expansion",
format!("Submodule path: {submodule_file_path}"),
delete_submodule_expansion(ctx, small_repo, sm_exp_data, parents, submodule_file_path),
)
.await;
};

View File

@ -20,6 +20,8 @@ use futures::stream;
use futures::stream::Stream;
use futures::stream::StreamExt;
use futures::stream::TryStreamExt;
use futures::Future;
use futures_stats::TimedFutureExt;
use git_types::ObjectKind;
use manifest::bonsai_diff;
use manifest::BonsaiDiffFileChange;
@ -271,3 +273,19 @@ pub(crate) async fn root_fsnode_id_from_submodule_git_commit(
Ok(submodule_root_fsnode_id.into_fsnode_id())
}
pub async fn run_and_log_stats_to_scuba<R, S>(
ctx: &CoreContext,
log_tag: &str,
msg: S,
fut: impl Future<Output = R>,
) -> R
where
S: Into<Option<String>>,
{
let (stats, result) = fut.timed().await;
let mut scuba = ctx.scuba().clone();
scuba.add_future_stats(&stats);
scuba.log_with_msg(log_tag, msg);
result
}

View File

@ -47,6 +47,7 @@ use crate::git_submodules::utils::get_x_repo_submodule_metadata_file_path;
use crate::git_submodules::utils::git_hash_from_submodule_metadata_file;
use crate::git_submodules::utils::list_non_submodule_files_under;
use crate::git_submodules::utils::root_fsnode_id_from_submodule_git_commit;
use crate::git_submodules::utils::run_and_log_stats_to_scuba;
use crate::git_submodules::utils::x_repo_submodule_metadata_file_basename;
use crate::types::Repo;
@ -71,13 +72,18 @@ pub async fn validate_all_submodule_expansions<'a, R: Repo>(
.try_fold(bonsai, |bonsai, (submodule_path, submodule_repo)| {
cloned!(mover, sm_exp_data);
async move {
validate_submodule_expansion(
run_and_log_stats_to_scuba(
ctx,
sm_exp_data,
bonsai,
submodule_path,
submodule_repo,
mover,
"Validating submodule expansion",
format!("Submodule path: {submodule_path}"),
validate_submodule_expansion(
ctx,
sm_exp_data,
bonsai,
submodule_path,
submodule_repo,
mover,
),
)
.await
}
@ -234,13 +240,21 @@ async fn validate_submodule_expansion<'a, R: Repo>(
// The submodule roots fsnode and the fsnode from its expansion in the large
// repo should be exactly the same.
validate_working_copy_of_expansion_with_recursive_submodules(
run_and_log_stats_to_scuba(
ctx,
sm_exp_data,
adjusted_submodule_deps,
submodule_repo,
expansion_fsnode_id,
submodule_fsnode_id,
"Validate working copy of submodule expansion with recursive submodules",
format!(
"Recursive submodule: {}",
submodule_repo.repo_identity().name()
),
validate_working_copy_of_expansion_with_recursive_submodules(
ctx,
sm_exp_data,
adjusted_submodule_deps,
submodule_repo,
expansion_fsnode_id,
submodule_fsnode_id,
),
)
.await?;
@ -536,14 +550,23 @@ async fn validate_working_copy_of_expansion_with_recursive_submodules<'a, R: Rep
borrowed!(submodule_repo);
async move {
validate_expansion_directory_against_submodule_manifest_entry(
run_and_log_stats_to_scuba(
ctx,
sm_exp_data,
submodule_repo,
adjusted_submodule_deps,
iteration_data,
exp_path,
exp_directory,
"Validate expansion directory against submodule manifest entry",
format!(
"submodule_repo: {} / exp_path: {}",
submodule_repo.repo_identity().name(),
exp_path
),
validate_expansion_directory_against_submodule_manifest_entry(
ctx,
sm_exp_data,
submodule_repo,
adjusted_submodule_deps,
iteration_data,
exp_path,
exp_directory,
),
)
.await
}