From 31728b14ac9ee2c47243b2c28ef4876be301a59e Mon Sep 17 00:00:00 2001 From: Gustavo Galvao Avena Date: Mon, 29 Apr 2024 03:41:49 -0700 Subject: [PATCH] Log performance stats during submodule expansion Summary: It will be very useful to know how much time we're spending on each part of the submodule expansion and validation flow. It will help determine if and where we need to work on improving performance, because the inital import can run for a long time, but the same code will be used by live forward and backsyncer, so it needs to be reasonably fast. Reviewed By: andreacampi Differential Revision: D56561379 fbshipit-source-id: 26544914299e2ef022d472a0269920fe26f5b1f4 --- .../cross_repo_sync/Cargo.toml | 1 + .../commit_rewriting/cross_repo_sync/TARGETS | 1 + .../src/git_submodules/expand.rs | 69 ++++++++++++------- .../src/git_submodules/utils.rs | 18 +++++ .../src/git_submodules/validation.rs | 61 +++++++++++----- 5 files changed, 107 insertions(+), 43 deletions(-) diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/Cargo.toml b/eden/mononoke/commit_rewriting/cross_repo_sync/Cargo.toml index 965c0200a0..b44602b57d 100644 --- a/eden/mononoke/commit_rewriting/cross_repo_sync/Cargo.toml +++ b/eden/mononoke/commit_rewriting/cross_repo_sync/Cargo.toml @@ -42,6 +42,7 @@ filenodes = { version = "0.1.0", path = "../../filenodes" } filestore = { version = "0.1.0", path = "../../filestore" } fsnodes = { version = "0.1.0", path = "../../derived_data/fsnodes" } futures = { version = "0.3.30", features = ["async-await", "compat"] } +futures_stats = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" } git_types = { version = "0.1.0", path = "../../git/git_types" } itertools = "0.11.0" justknobs = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" } diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/TARGETS b/eden/mononoke/commit_rewriting/cross_repo_sync/TARGETS index 03b906bcfc..de8c7061c3 100644 --- a/eden/mononoke/commit_rewriting/cross_repo_sync/TARGETS +++ b/eden/mononoke/commit_rewriting/cross_repo_sync/TARGETS @@ -43,6 +43,7 @@ rust_library( "//common/rust/shed/cloned:cloned", "//common/rust/shed/facet:facet", "//common/rust/shed/fbinit:fbinit", + "//common/rust/shed/futures_stats:futures_stats", "//common/rust/shed/justknobs_stub:justknobs", "//common/rust/shed/sorted_vector_map:sorted_vector_map", "//common/rust/shed/sql:sql", diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/expand.rs b/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/expand.rs index 661f3110fc..76a745bc5a 100644 --- a/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/expand.rs +++ b/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/expand.rs @@ -51,6 +51,7 @@ use crate::git_submodules::utils::get_x_repo_submodule_metadata_file_path; use crate::git_submodules::utils::is_path_git_submodule; use crate::git_submodules::utils::list_all_paths; use crate::git_submodules::utils::list_non_submodule_files_under; +use crate::git_submodules::utils::run_and_log_stats_to_scuba; use crate::git_submodules::utils::submodule_diff; use crate::git_submodules::validation::validate_all_submodule_expansions; use crate::types::Large; @@ -101,19 +102,28 @@ pub async fn expand_and_validate_all_git_submodule_file_changes<'a, R: Repo>( "Can't sync changes from large to small repo if small repo has submodule expansion enabled" ); - let new_bonsai = - expand_all_git_submodule_file_changes(ctx, bonsai, small_repo, sm_exp_data.clone()) - .await - .context("Failed to expand submodule file changes from bonsai")?; - - let rewritten_bonsai = rewrite_commit( + let new_bonsai = run_and_log_stats_to_scuba( ctx, - new_bonsai, - remapped_parents, - mover_to_multi_mover(mover.clone()), - small_repo, + "Expanding all git submodule file changes", None, - rewrite_opts, + expand_all_git_submodule_file_changes(ctx, bonsai, small_repo, sm_exp_data.clone()), + ) + .await + .context("Failed to expand submodule file changes from bonsai")?; + + let rewritten_bonsai = run_and_log_stats_to_scuba( + ctx, + "Rewriting commit", + None, + rewrite_commit( + ctx, + new_bonsai, + remapped_parents, + mover_to_multi_mover(mover.clone()), + small_repo, + None, + rewrite_opts, + ), ) .await .context("Failed to create bonsai to be synced")? @@ -121,8 +131,15 @@ pub async fn expand_and_validate_all_git_submodule_file_changes<'a, R: Repo>( let rewritten_bonsai = rewritten_bonsai.freeze()?; - let validated_bonsai = - validate_all_submodule_expansions(ctx, sm_exp_data, rewritten_bonsai, mover).await?; + let validated_bonsai = run_and_log_stats_to_scuba( + ctx, + "Validating all submodule expansions", + None, + validate_all_submodule_expansions(ctx, sm_exp_data, rewritten_bonsai, mover), + ) + .await + // TODO(gustavoavena): print some identifier of changeset that failed + .context("Validation of submodule expansion failed")?; Ok(validated_bonsai.into_mut()) } @@ -146,13 +163,18 @@ async fn expand_all_git_submodule_file_changes<'a, R: Repo>( match &fc { FileChange::Change(tfc) => match &tfc.file_type() { FileType::GitSubmodule => { - expand_git_submodule_file_change( + run_and_log_stats_to_scuba( ctx, - small_repo, - sm_exp_data.clone(), - parents, - p, - tfc.content_id(), + "Expand git submodule file change", + format!("submodule_file_path: {p}"), + expand_git_submodule_file_change( + ctx, + small_repo, + sm_exp_data.clone(), + parents, + p, + tfc.content_id(), + ), ) .await } @@ -704,12 +726,11 @@ async fn handle_submodule_deletion<'a, R: Repo>( } // This is a submodule file, so delete its entire expanded directory. - return delete_submodule_expansion( + return run_and_log_stats_to_scuba( ctx, - small_repo, - sm_exp_data, - parents, - submodule_file_path, + "Deleting submodule expansion", + format!("Submodule path: {submodule_file_path}"), + delete_submodule_expansion(ctx, small_repo, sm_exp_data, parents, submodule_file_path), ) .await; }; diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/utils.rs b/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/utils.rs index 608df6fc0f..7800692efd 100644 --- a/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/utils.rs +++ b/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/utils.rs @@ -20,6 +20,8 @@ use futures::stream; use futures::stream::Stream; use futures::stream::StreamExt; use futures::stream::TryStreamExt; +use futures::Future; +use futures_stats::TimedFutureExt; use git_types::ObjectKind; use manifest::bonsai_diff; use manifest::BonsaiDiffFileChange; @@ -271,3 +273,19 @@ pub(crate) async fn root_fsnode_id_from_submodule_git_commit( Ok(submodule_root_fsnode_id.into_fsnode_id()) } + +pub async fn run_and_log_stats_to_scuba( + ctx: &CoreContext, + log_tag: &str, + msg: S, + fut: impl Future, +) -> R +where + S: Into>, +{ + let (stats, result) = fut.timed().await; + let mut scuba = ctx.scuba().clone(); + scuba.add_future_stats(&stats); + scuba.log_with_msg(log_tag, msg); + result +} diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/validation.rs b/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/validation.rs index 505f58ca8d..61163c32eb 100644 --- a/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/validation.rs +++ b/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/validation.rs @@ -47,6 +47,7 @@ use crate::git_submodules::utils::get_x_repo_submodule_metadata_file_path; use crate::git_submodules::utils::git_hash_from_submodule_metadata_file; use crate::git_submodules::utils::list_non_submodule_files_under; use crate::git_submodules::utils::root_fsnode_id_from_submodule_git_commit; +use crate::git_submodules::utils::run_and_log_stats_to_scuba; use crate::git_submodules::utils::x_repo_submodule_metadata_file_basename; use crate::types::Repo; @@ -71,13 +72,18 @@ pub async fn validate_all_submodule_expansions<'a, R: Repo>( .try_fold(bonsai, |bonsai, (submodule_path, submodule_repo)| { cloned!(mover, sm_exp_data); async move { - validate_submodule_expansion( + run_and_log_stats_to_scuba( ctx, - sm_exp_data, - bonsai, - submodule_path, - submodule_repo, - mover, + "Validating submodule expansion", + format!("Submodule path: {submodule_path}"), + validate_submodule_expansion( + ctx, + sm_exp_data, + bonsai, + submodule_path, + submodule_repo, + mover, + ), ) .await } @@ -234,13 +240,21 @@ async fn validate_submodule_expansion<'a, R: Repo>( // The submodule roots fsnode and the fsnode from its expansion in the large // repo should be exactly the same. - validate_working_copy_of_expansion_with_recursive_submodules( + run_and_log_stats_to_scuba( ctx, - sm_exp_data, - adjusted_submodule_deps, - submodule_repo, - expansion_fsnode_id, - submodule_fsnode_id, + "Validate working copy of submodule expansion with recursive submodules", + format!( + "Recursive submodule: {}", + submodule_repo.repo_identity().name() + ), + validate_working_copy_of_expansion_with_recursive_submodules( + ctx, + sm_exp_data, + adjusted_submodule_deps, + submodule_repo, + expansion_fsnode_id, + submodule_fsnode_id, + ), ) .await?; @@ -536,14 +550,23 @@ async fn validate_working_copy_of_expansion_with_recursive_submodules<'a, R: Rep borrowed!(submodule_repo); async move { - validate_expansion_directory_against_submodule_manifest_entry( + run_and_log_stats_to_scuba( ctx, - sm_exp_data, - submodule_repo, - adjusted_submodule_deps, - iteration_data, - exp_path, - exp_directory, + "Validate expansion directory against submodule manifest entry", + format!( + "submodule_repo: {} / exp_path: {}", + submodule_repo.repo_identity().name(), + exp_path + ), + validate_expansion_directory_against_submodule_manifest_entry( + ctx, + sm_exp_data, + submodule_repo, + adjusted_submodule_deps, + iteration_data, + exp_path, + exp_directory, + ), ) .await }