mononoke: reuse hg manifest from parents if they are identical

Summary:
This is a followup from D28903515 (9a3fbfe311). In D28903515 (9a3fbfe311) we've added support for reusing
hg filenodes if parent has the same filenode. However we weren't reusing
manifests even if parent has an identical manifest, and this diff adds a
support to do so.

There's one caveat - we try to reuse parent manifests only if there are more
than one parent manifest. See explanation in the comments.

Reviewed By: farnz

Differential Revision: D29098908

fbshipit-source-id: 5ecfdc4b022ffc7620501cc024e7a659fb82f768
This commit is contained in:
Stanislau Hlebik 2021-06-22 11:48:56 -07:00 committed by Facebook GitHub Bot
parent 70857500c7
commit 56c926297f
12 changed files with 84 additions and 35 deletions

View File

@ -24,6 +24,7 @@ futures = { version = "0.3.13", features = ["async-await", "compat"] }
manifest = { version = "0.1.0", path = "../../manifest" }
mercurial_types = { version = "0.1.0", path = "../../mercurial/types" }
mononoke_types = { version = "0.1.0", path = "../../mononoke_types" }
sorted_vector_map = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" }
stats = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" }
tokio = { version = "1.6.1", features = ["full", "test-util"] }

View File

@ -14,7 +14,7 @@ use futures::future::try_join_all;
use futures::{
channel::mpsc,
compat::Future01CompatExt,
future::{BoxFuture, FutureExt, TryFutureExt},
future::{self, BoxFuture, FutureExt, TryFutureExt},
};
use manifest::{derive_manifest_with_io_sender, Entry, LeafInfo, Traced, TreeInfo};
use mercurial_types::{
@ -25,7 +25,8 @@ use mercurial_types::{
HgFileNodeId, HgManifestId,
};
use mononoke_types::{FileType, MPath, RepoPath};
use std::{io::Write, sync::Arc};
use sorted_vector_map::SortedVectorMap;
use std::{collections::BTreeMap, io::Write, sync::Arc};
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
struct ParentIndex(usize);
@ -94,6 +95,49 @@ async fn create_hg_manifest(
parents,
} = tree_info;
// See if any of the parents have the same hg manifest. If yes, then we can just reuse it
// without recreating manifest again.
// But note that we reuse only if manifest has more than on parent, and there are a few reasons for
// it:
// 1) If a commit have a single parent then create_hg_manifest function shouldn't normally be called -
// it can only happen if a file hasn't changed, but nevertheless there's an entry for this file
// in the bonsai. This should happen rarely, and recreating manifest in these cases shouldn't be
// a problem.
// 2) It adds an additional read of parent manifests, and it can potentially be expensive if manifests
// are large.
// We'd rather not do it if we don't need to, and it seems that we don't really need to (see point 1)
if parents.len() > 1 {
let mut subentries_vec_map = BTreeMap::new();
for (name, (_context, subentry)) in &subentries {
let subentry = match subentry {
Entry::Tree(manifest_id) => Entry::Tree(*manifest_id.untraced()),
Entry::Leaf(leaf) => Entry::Leaf(*leaf.untraced()),
};
subentries_vec_map.insert(name.clone(), subentry);
}
let subentries_vec_map = SortedVectorMap::from(subentries_vec_map);
let (p1_parent, p2_parent) = hg_parents(&parents);
let loaded_parents = {
let ctx = &ctx;
let blobstore = &blobstore;
future::try_join_all(p1_parent.into_iter().chain(p2_parent).map(|id| async move {
let mf = id.load(ctx, blobstore).map_err(Error::from).await?;
Result::<_, Error>::Ok((id, mf))
}))
.await?
};
if let Some((reuse_id, _)) = loaded_parents
.into_iter()
.find(|(_, p)| p.content().files == subentries_vec_map)
{
return Ok(((), Traced::generate(reuse_id)));
}
}
let mut contents = Vec::new();
for (name, (_context, subentry)) in subentries {
contents.extend(name.as_ref());

View File

@ -215,6 +215,10 @@ impl HgBlobManifest {
pub fn computed_node_id(&self) -> HgNodeHash {
self.computed_node_id
}
pub fn content(&self) -> &ManifestContent {
&self.content
}
}
#[async_trait]

View File

@ -1513,7 +1513,7 @@ mod test {
let merge_node = string_to_bonsai(
ctx.clone(),
&repo,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await;
let sli = SkiplistIndex::new();
@ -1644,7 +1644,7 @@ mod test {
let _merge_node = string_to_bonsai(
ctx.clone(),
&repo,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await;
let sli = SkiplistIndex::new();
@ -2351,7 +2351,7 @@ mod test {
let merge_node = string_to_bonsai(
ctx.clone(),
&repo,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await;
@ -2520,7 +2520,7 @@ mod test {
let merge_node = string_to_bonsai(
ctx.clone(),
&repo,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await;
@ -3039,8 +3039,8 @@ mod test {
repo,
sli,
"d7542c9db7f4c77dab4b315edd328edf1514952f",
"80274ca17119952e89d706b3d9e9f30dc831c92d",
Some("80274ca17119952e89d706b3d9e9f30dc831c92d"),
"1f6bc010883e397abeca773192f3370558ee1320",
Some("1f6bc010883e397abeca773192f3370558ee1320"),
)
.await;
}

View File

@ -198,7 +198,7 @@ pub async fn test_merge_uneven_reachability<T: ReachabilityIndex + 'static>(
let _merge_node = string_to_bonsai(
ctx.clone(),
&repo,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await;

View File

@ -240,7 +240,7 @@ mod test {
let nodestream = AncestorsNodeStream::new(
ctx.clone(),
&changeset_fetcher,
string_to_bonsai(fb, &repo, "416b719c25bc1e5c071ed9929072647d72ca10c3").await,
string_to_bonsai(fb, &repo, "d35b1875cdd1ed2c687e86f1604b9d7e989450cb").await,
)
.boxify();
@ -248,7 +248,7 @@ mod test {
ctx.clone(),
&repo,
vec![
string_to_bonsai(fb, &repo, "416b719c25bc1e5c071ed9929072647d72ca10c3").await,
string_to_bonsai(fb, &repo, "d35b1875cdd1ed2c687e86f1604b9d7e989450cb").await,
string_to_bonsai(fb, &repo, "264f01429683b3dd8042cb3979e8bf37007118bc").await,
string_to_bonsai(fb, &repo, "5d43888a3c972fe68c224f93d41b30e9f888df7c").await,
string_to_bonsai(fb, &repo, "fc2cef43395ff3a7b28159007f63d6529d2f41ca").await,

View File

@ -524,7 +524,7 @@ mod test {
ctx.clone(),
&changeset_fetcher,
Arc::new(SkiplistIndex::new()),
vec![string_to_bonsai(fb, &repo, "416b719c25bc1e5c071ed9929072647d72ca10c3").await],
vec![string_to_bonsai(fb, &repo, "d35b1875cdd1ed2c687e86f1604b9d7e989450cb").await],
vec![
string_to_bonsai(fb, &repo, "fc2cef43395ff3a7b28159007f63d6529d2f41ca").await,
string_to_bonsai(fb, &repo, "16839021e338500b3cf7c9b871c8a07351697d68").await,
@ -536,7 +536,7 @@ mod test {
ctx.clone(),
&repo,
vec![
string_to_bonsai(fb, &repo, "416b719c25bc1e5c071ed9929072647d72ca10c3").await,
string_to_bonsai(fb, &repo, "d35b1875cdd1ed2c687e86f1604b9d7e989450cb").await,
string_to_bonsai(fb, &repo, "264f01429683b3dd8042cb3979e8bf37007118bc").await,
string_to_bonsai(fb, &repo, "5d43888a3c972fe68c224f93d41b30e9f888df7c").await,
],
@ -557,7 +557,7 @@ mod test {
ctx.clone(),
&changeset_fetcher,
Arc::new(SkiplistIndex::new()),
vec![string_to_bonsai(fb, &repo, "416b719c25bc1e5c071ed9929072647d72ca10c3").await],
vec![string_to_bonsai(fb, &repo, "d35b1875cdd1ed2c687e86f1604b9d7e989450cb").await],
vec![string_to_bonsai(fb, &repo, "16839021e338500b3cf7c9b871c8a07351697d68").await],
)
.boxify();
@ -566,7 +566,7 @@ mod test {
ctx.clone(),
&repo,
vec![
string_to_bonsai(fb, &repo, "416b719c25bc1e5c071ed9929072647d72ca10c3").await,
string_to_bonsai(fb, &repo, "d35b1875cdd1ed2c687e86f1604b9d7e989450cb").await,
string_to_bonsai(fb, &repo, "264f01429683b3dd8042cb3979e8bf37007118bc").await,
string_to_bonsai(fb, &repo, "5d43888a3c972fe68c224f93d41b30e9f888df7c").await,
string_to_bonsai(fb, &repo, "fc2cef43395ff3a7b28159007f63d6529d2f41ca").await,

View File

@ -450,7 +450,7 @@ mod test {
string_to_bonsai(
ctx.clone(),
&repo,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await,
)
@ -463,7 +463,7 @@ mod test {
string_to_bonsai(
ctx.clone(),
&repo,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await,
string_to_bonsai(
@ -496,7 +496,7 @@ mod test {
string_to_bonsai(
ctx.clone(),
&repo,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await,
)
@ -509,7 +509,7 @@ mod test {
string_to_bonsai(
ctx.clone(),
&repo,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await,
string_to_bonsai(

View File

@ -379,7 +379,7 @@ mod test {
ctx.clone(),
&repo,
&[
"80274ca17119952e89d706b3d9e9f30dc831c92d",
"1f6bc010883e397abeca773192f3370558ee1320",
"4f7f3fd428bec1a48f9314414b063c706d9c1aed",
"16839021e338500b3cf7c9b871c8a07351697d68",
],
@ -416,7 +416,7 @@ mod test {
ctx.clone(),
&repo,
vec![
string_to_bonsai(fb, &repo, "80274ca17119952e89d706b3d9e9f30dc831c92d").await,
string_to_bonsai(fb, &repo, "1f6bc010883e397abeca773192f3370558ee1320").await,
string_to_bonsai(fb, &repo, "16839021e338500b3cf7c9b871c8a07351697d68").await,
],
nodestream,
@ -437,7 +437,7 @@ mod test {
ctx.clone(),
&repo,
&[
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
"4f7f3fd428bec1a48f9314414b063c706d9c1aed",
"16839021e338500b3cf7c9b871c8a07351697d68",
],
@ -473,7 +473,7 @@ mod test {
ctx.clone(),
&repo,
vec![
string_to_bonsai(fb, &repo, "416b719c25bc1e5c071ed9929072647d72ca10c3").await,
string_to_bonsai(fb, &repo, "d35b1875cdd1ed2c687e86f1604b9d7e989450cb").await,
string_to_bonsai(fb, &repo, "4f7f3fd428bec1a48f9314414b063c706d9c1aed").await,
],
nodestream,

View File

@ -242,13 +242,13 @@ async fn test_build_idmap(fb: FacebookInit) -> Result<()> {
validate_build_idmap(
ctx.clone(),
merge_even::getrepo(fb).await,
"80274ca17119952e89d706b3d9e9f30dc831c92d",
"1f6bc010883e397abeca773192f3370558ee1320",
)
.await?;
validate_build_idmap(
ctx.clone(),
merge_uneven::getrepo(fb).await,
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
)
.await?;
Ok(())
@ -421,8 +421,8 @@ async fn test_changeset_id_to_location(fb: FacebookInit) -> Result<()> {
validate_changeset_id_to_location(
ctx.clone(),
merge_uneven::getrepo(fb).await,
"416b719c25bc1e5c071ed9929072647d72ca10c3", // master, message: Merge two branches
"416b719c25bc1e5c071ed9929072647d72ca10c3", // client head == master
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb", // master, message: Merge two branches
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb", // client head == master
"fc2cef43395ff3a7b28159007f63d6529d2f41ca", // message: Add 4
Some(Location::new("264f01429683b3dd8042cb3979e8bf37007118bc", 2)), // message: add 5
)

View File

@ -819,7 +819,7 @@ pub mod merge_even {
"author"=> "Simon Farnsworth <simonfar@fb.com>",
"author_date"=> "1506435224 25200",
"message"=> "Merge",
"expected_hg_changeset"=> "80274ca17119952e89d706b3d9e9f30dc831c92d",
"expected_hg_changeset"=> "1f6bc010883e397abeca773192f3370558ee1320",
"changed_files"=> "branch",
};
create_bonsai_changeset_from_test_data(fb, blobrepo.clone(), files, commit_metadata).await;
@ -827,7 +827,7 @@ pub mod merge_even {
set_bookmark(
fb,
blobrepo.clone(),
"80274ca17119952e89d706b3d9e9f30dc831c92d",
"1f6bc010883e397abeca773192f3370558ee1320",
BookmarkName::new("master").unwrap(),
)
.await;
@ -1112,14 +1112,14 @@ pub mod merge_uneven {
"author"=> "Simon Farnsworth <simonfar@fb.com>",
"author_date"=> "1506435672 25200",
"message"=> "Merge two branches",
"expected_hg_changeset"=> "416b719c25bc1e5c071ed9929072647d72ca10c3",
"expected_hg_changeset"=> "d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
};
create_bonsai_changeset_from_test_data(fb, blobrepo.clone(), files, commit_metadata).await;
set_bookmark(
fb,
blobrepo.clone(),
"416b719c25bc1e5c071ed9929072647d72ca10c3",
"d35b1875cdd1ed2c687e86f1604b9d7e989450cb",
BookmarkName::new("master").unwrap(),
)
.await;

View File

@ -277,12 +277,12 @@ Do the invisible merge by gradually merging TOMERGES into master
Current: 51c49b0bd6828234ce57148769ca56f254e463bd
To merge: 15f0ba01db155d0431552defe999ca51e5b8a0a632bc323cc2faeeddc4064cc1
Merged as (bonsai): e0d0f35215c77449e9e63807cbec7f09368ebd0591f11adb53a133a53add4a7a
Merged as (hg): a2da597439baf0918aaf1d6153ce85a7066bee9d
Merged as (hg): f2ac779eb5ef342aab788bcb278e57e53b2bc83e
file count is: 4
Current: 537d8dc759cb6c028c6907e51bf01217f6e748bf
Current: 0eb9c5feca13f5b7c5daf2c34b659c3846569fad
To merge: 0b114e8a3d0d62a31ff8f99b8894603cf37cdb6edc070d744a7a457bd360fc0a
Merged as (bonsai): 9e0bfdd3a6cd0a41697e67f00baf3d060e0e7660f7e2b0e2be34c3f5c5691984
Merged as (hg): 00ee43db03b2bd0ed0e9ad806f23f042dd3ddff8
Merged as (hg): 005686fbc230dc0be4e1cc2fabf46d87bbb19001
file count is: 6
$ REPONAME=fbs-mon hgmn pull -q |& grep -v 'devel-warn'
[1]
@ -351,7 +351,7 @@ Perform ovrsource pushrebase, make sure it is push-redirected into Fbsource
$ cd "$TESTTMP"/fbs-hg-cnt
$ REPONAME=fbs-mon hgmn pull -q
$ log -r master_bookmark
o pushredirected_3 [public;rev=14;a272f72e81b3] default/master_bookmark
o pushredirected_3 [public;rev=14;223e2529a7b8] default/master_bookmark
~
-- ensure that ovrsource root path ends up in megarepo's arvr-legacy