mononoke: create a stack of merge commits

Summary:
Previously add sync target method was creating a single merge commit. That
means that we might create a bonsai commit with hundreds of parents. This is
not ideal, because mercurial can only work correctly with 2 parents - for
bonsai changeset with 3 parents or more mercurial file histories might be lost.

So instead of creating a single giant merge commit let's create a stack of
merge commits.

Reviewed By: mitrandir77

Differential Revision: D28792581

fbshipit-source-id: 2f8ff6b49db29c4692b7385f1d1ab57986075d57
This commit is contained in:
Stanislau Hlebik 2021-06-02 07:58:49 -07:00 committed by Facebook GitHub Bot
parent 2c63981029
commit 631d21ec95
2 changed files with 173 additions and 16 deletions

View File

@ -16,7 +16,9 @@ use derived_data::BonsaiDerived;
use fsnodes::RootFsnodeId; use fsnodes::RootFsnodeId;
use futures::{stream, StreamExt, TryStreamExt}; use futures::{stream, StreamExt, TryStreamExt};
use manifest::ManifestOps; use manifest::ManifestOps;
use megarepo_config::{MononokeMegarepoConfigs, Source, SourceRevision, SyncTargetConfig}; use megarepo_config::{
MononokeMegarepoConfigs, Source, SourceRevision, SyncConfigVersion, SyncTargetConfig,
};
use megarepo_error::MegarepoError; use megarepo_error::MegarepoError;
use megarepo_mapping::CommitRemappingState; use megarepo_mapping::CommitRemappingState;
use mononoke_api::Mononoke; use mononoke_api::Mononoke;
@ -230,6 +232,19 @@ impl<'a> AddSyncTarget<'a> {
Ok(*changeset_id) Ok(*changeset_id)
} }
// Merge moved commits from a lot of sources together
// Instead of creating a single merge commits with lots of parents
// we create a stack of merge commits (the primary reason for that is
// that mercurial doesn't support more than 2 parents)
//
// Merge_n
// / \
// Merge_n-1 Move_n
// | \
// | Move_n-1
// Merge_n-2
// | \
// Move_n-2
async fn create_merge_commits( async fn create_merge_commits(
&self, &self,
ctx: &CoreContext, ctx: &CoreContext,
@ -237,7 +252,7 @@ impl<'a> AddSyncTarget<'a> {
moved_commits: Vec<(SourceName, SourceAndMovedChangesets)>, moved_commits: Vec<(SourceName, SourceAndMovedChangesets)>,
sync_target_config: &SyncTargetConfig, sync_target_config: &SyncTargetConfig,
message: Option<String>, message: Option<String>,
) -> Result<ChangesetId, Error> { ) -> Result<ChangesetId, MegarepoError> {
// Now let's create a merge commit that merges all moved changesets // Now let's create a merge commit that merges all moved changesets
// We need to create a file with the latest commits that were synced from // We need to create a file with the latest commits that were synced from
@ -251,28 +266,64 @@ impl<'a> AddSyncTarget<'a> {
sync_target_config.version.clone(), sync_target_config.version.clone(),
); );
// TODO(stash): avoid doing a single merge commit, and do a stack of merges instead let (last_moved_commit, first_moved_commits) = match moved_commits.split_last() {
let mut bcs = BonsaiChangesetMut { Some((last_moved_commit, first_moved_commits)) => {
parents: moved_commits (last_moved_commit, first_moved_commits)
.into_iter() }
.map(|(_, css)| css.moved.get_changeset_id()) None => {
.collect(), return Err(MegarepoError::request(anyhow!(
"no move commits were set - target has no sources?"
)));
}
};
let mut merges = vec![];
let mut cur_parents = vec![];
for (_, css) in first_moved_commits {
cur_parents.push(css.moved.get_changeset_id());
if cur_parents.len() > 1 {
let bcs = self.create_merge_commit(
message.clone(),
cur_parents,
sync_target_config.version.clone(),
)?;
let merge = bcs.freeze()?;
cur_parents = vec![merge.get_changeset_id()];
merges.push(merge);
}
}
cur_parents.push(last_moved_commit.1.moved.get_changeset_id());
let mut final_merge =
self.create_merge_commit(message, cur_parents, sync_target_config.version.clone())?;
state.save_in_changeset(ctx, repo, &mut final_merge).await?;
let final_merge = final_merge.freeze()?;
merges.push(final_merge.clone());
save_bonsai_changesets(merges, ctx.clone(), repo.clone()).await?;
Ok(final_merge.get_changeset_id())
}
fn create_merge_commit(
&self,
message: Option<String>,
parents: Vec<ChangesetId>,
version: SyncConfigVersion,
) -> Result<BonsaiChangesetMut, Error> {
// TODO(stash, mateusz, simonfar): figure out what fields
// we need to set here
let bcs = BonsaiChangesetMut {
parents,
author: "svcscm".to_string(), author: "svcscm".to_string(),
author_date: DateTime::now(), author_date: DateTime::now(),
committer: None, committer: None,
committer_date: None, committer_date: None,
message: message.unwrap_or(format!( message: message.unwrap_or(format!("Add new sync target with version {}", version)),
"Add new sync target with version {}",
sync_target_config.version
)),
extra: SortedVectorMap::new(), extra: SortedVectorMap::new(),
file_changes: SortedVectorMap::new(), file_changes: SortedVectorMap::new(),
}; };
state.save_in_changeset(ctx, repo, &mut bcs).await?;
let bcs = bcs.freeze()?;
save_bonsai_changesets(vec![bcs.clone()], ctx.clone(), repo.clone()).await?;
Ok(bcs.get_changeset_id()) Ok(bcs)
} }
async fn create_single_move_commit<'b>( async fn create_single_move_commit<'b>(

View File

@ -10,6 +10,7 @@ use crate::common::SourceName;
use crate::megarepo_test_utils::{MegarepoTest, SyncTargetConfigBuilder}; use crate::megarepo_test_utils::{MegarepoTest, SyncTargetConfigBuilder};
use crate::sync_changeset::SyncChangeset; use crate::sync_changeset::SyncChangeset;
use anyhow::Error; use anyhow::Error;
use blobstore::Loadable;
use context::CoreContext; use context::CoreContext;
use fbinit::FacebookInit; use fbinit::FacebookInit;
use maplit::hashmap; use maplit::hashmap;
@ -555,3 +556,108 @@ async fn test_add_sync_target_invalid_hash_to_merge(fb: FacebookInit) -> Result<
Ok(()) Ok(())
} }
#[fbinit::test]
async fn test_add_sync_target_merge_three_sources(fb: FacebookInit) -> Result<(), Error> {
let ctx = CoreContext::test_mock(fb);
let mut test = MegarepoTest::new(&ctx).await?;
let target: Target = test.target("target".to_string());
let first_source_name = "source_1".to_string();
let second_source_name = "source_2".to_string();
let third_source_name = "source_3".to_string();
let version = "version_1".to_string();
SyncTargetConfigBuilder::new(test.repo_id(), target.clone(), version.clone())
.source_builder(first_source_name.clone())
.set_prefix_bookmark_to_source_name()
.build_source()?
.source_builder(second_source_name.clone())
.set_prefix_bookmark_to_source_name()
.build_source()?
.source_builder(third_source_name.clone())
.set_prefix_bookmark_to_source_name()
.build_source()?
.build(&mut test.configs_storage);
println!("Create initial source commits and bookmarks");
let first_source_cs_id = CreateCommitContext::new_root(&ctx, &test.blobrepo)
.add_file("first", "first")
.commit()
.await?;
bookmark(&ctx, &test.blobrepo, first_source_name.clone())
.set_to(first_source_cs_id)
.await?;
let second_source_cs_id = CreateCommitContext::new_root(&ctx, &test.blobrepo)
.add_file("second", "second")
.commit()
.await?;
bookmark(&ctx, &test.blobrepo, second_source_name.clone())
.set_to(second_source_cs_id)
.await?;
let third_source_cs_id = CreateCommitContext::new_root(&ctx, &test.blobrepo)
.add_file("third", "third")
.commit()
.await?;
bookmark(&ctx, &test.blobrepo, third_source_name.clone())
.set_to(third_source_cs_id)
.await?;
let configs_storage: Arc<dyn MononokeMegarepoConfigs> = Arc::new(test.configs_storage.clone());
let sync_target_config =
test.configs_storage
.get_config_by_version(ctx.clone(), target, version.clone())?;
let add_sync_target = AddSyncTarget::new(&configs_storage, &test.mononoke);
add_sync_target
.run(
&ctx,
sync_target_config.clone(),
hashmap! {
SourceName(first_source_name.clone()) => first_source_cs_id,
SourceName(second_source_name.clone()) => second_source_cs_id,
SourceName(third_source_name.clone()) => third_source_cs_id,
},
None,
)
.await?;
let target_cs_id = resolve_cs_id(&ctx, &test.blobrepo, "target").await?;
let mut wc = list_working_copy_utf8(&ctx, &test.blobrepo, target_cs_id).await?;
// Remove file with commit remapping state because it's never present in source
assert!(wc.remove(&MPath::new(REMAPPING_STATE_FILE)?).is_some());
assert_eq!(
wc,
hashmap! {
MPath::new("source_1/first")? => "first".to_string(),
MPath::new("source_2/second")? => "second".to_string(),
MPath::new("source_3/third")? => "third".to_string(),
}
);
// Validate the shape of the graph
// It should look like
// o
// / \
// o M
// / \
let target_cs = target_cs_id.load(&ctx, test.blobrepo.blobstore()).await?;
assert!(target_cs.is_merge());
let parents = target_cs.parents().collect::<Vec<_>>();
assert_eq!(parents.len(), 2);
let first_merge = parents[0].load(&ctx, test.blobrepo.blobstore()).await?;
assert!(first_merge.is_merge());
let move_commit = parents[1].load(&ctx, test.blobrepo.blobstore()).await?;
assert!(!move_commit.is_merge());
Ok(())
}