mononoke: introduce generate_filenodes method

Summary:
At the moment filenodes [1] are generated as soon as hg changeset is generated
(see get_manifest_from_bonsai() from bonsai function). This is not great for a
few reasons:

1) It makes code more complicated - we have to pass IncomepleteFilenodes
structure all the way down to derive_hg_manifest() function, we have to make
pushrebase aware of the filenodes and we have to pass "draft" flag to
CreateChangeset.

2) It's actually incorrect - we don't want to generate filenodes for draft
commits at all [2]. It works right now because we don't generate hg changesets
for commit cloud commits, but it's just a matter of time when we start doing
it [3].

The goal of this stack is to separate filenode generation from hg
changeset generation, and move filenode generation onto derived data. Filenodes
will be generated during generating getbundle or pusrebase responses.

This diff implements the core functionality - it generates a list of filenodes
for a bonsai changeset. It does so by finding all changed files/manifests for a
commits, and then fetching respective envelopes to get p1/p2/copy info.

Note that it's possible to avoid fetching file envelopes, however then we'd
have to replicate a complicated logic of finding correct filenode parents (see
https://fburl.com/25l6h1nm), and I don't think perf benefit outweighs
introduced code complexity.

Reviewed By: farnz

Differential Revision: D19501515

fbshipit-source-id: 8bf9b9fcbf832d99443a146257cd93cdf7e34130
This commit is contained in:
Stanislau Hlebik 2020-01-22 14:44:16 -08:00 committed by Facebook Github Bot
parent 4b3abaf708
commit b3e4fe38e3
3 changed files with 358 additions and 9 deletions

View File

@ -0,0 +1,316 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License found in the LICENSE file in the root
* directory of this source tree.
*/
#![deny(warnings)]
use anyhow::{format_err, Error};
use blobrepo::BlobRepo;
use context::CoreContext;
use filenodes::FilenodeInfo;
use futures::Future;
use futures_ext::FutureExt;
use futures_preview::compat::{Future01CompatExt, Stream01CompatExt};
use futures_util::{future::try_join_all, try_join, TryStreamExt};
use manifest::{find_intersection_of_diffs_and_parents, Entry};
use mercurial_types::{
blobs::{fetch_file_envelope, File},
fetch_manifest_envelope, HgChangesetId, HgFileEnvelope, HgFileNodeId, HgManifestEnvelope,
HgManifestId,
};
use mononoke_types::{ChangesetId, MPath, RepoPath};
pub async fn generate_all_filenodes(
ctx: CoreContext,
repo: BlobRepo,
cs_id: ChangesetId,
) -> Result<Vec<FilenodeInfo>, Error> {
let parents = repo
.get_changeset_parents_by_bonsai(ctx.clone(), cs_id)
.compat()
.await?;
// Mercurial commits can have only 2 parents, however bonsai commits can have more
// In that case p3, p4, ... are ignored (they are called step parents)
let cs_parents: Vec<_> = parents.into_iter().take(2).collect();
let root_mf = fetch_root_manifest_id(ctx.clone(), cs_id, &repo);
let parents = try_join_all(
cs_parents
.clone()
.into_iter()
.map(|p| fetch_root_manifest_id(ctx.clone(), p, &repo)),
);
let linknode = repo
.get_hg_from_bonsai_changeset(ctx.clone(), cs_id)
.compat();
let (root_mf, parents, linknode) = try_join!(root_mf, parents, linknode)?;
let blobstore = repo.get_blobstore().boxed();
find_intersection_of_diffs_and_parents(
ctx.clone(),
repo.get_blobstore(),
root_mf,
parents.clone(),
)
.compat()
.try_filter_map(|(path, entry, parent_entries)| {
async move {
// file entry has file type and file node id. If file type is different but filenode is
// the same we don't want to create a new filenode, and this filter removes
// all entries where at least one parent has the same filenode id.
if let Entry::Leaf((_, hg_filenode_id)) = entry {
for parent_entry in parent_entries {
if let Entry::Leaf((_, parent_filenode_id)) = parent_entry {
if parent_filenode_id == hg_filenode_id {
return Ok(None);
}
}
}
}
Ok(Some((path, entry)))
}
})
.map_ok(move |(path, entry)| match entry {
Entry::Tree(hg_mf_id) => fetch_manifest_envelope(ctx.clone(), &blobstore, hg_mf_id)
.map(move |envelope| create_manifest_filenode(path, envelope, linknode))
.left_future()
.compat(),
Entry::Leaf((_, hg_filenode_id)) => {
fetch_file_envelope(ctx.clone(), &blobstore, hg_filenode_id)
.and_then(move |envelope| create_file_filenode(path, envelope, linknode))
.right_future()
.compat()
}
})
.try_buffer_unordered(100)
.try_collect()
.await
}
fn create_manifest_filenode(
path: Option<MPath>,
envelope: HgManifestEnvelope,
linknode: HgChangesetId,
) -> FilenodeInfo {
let path = match path {
Some(path) => RepoPath::DirectoryPath(path),
None => RepoPath::RootPath,
};
let filenode = HgFileNodeId::new(envelope.node_id());
let (p1, p2) = envelope.parents();
let p1 = p1.map(HgFileNodeId::new);
let p2 = p2.map(HgFileNodeId::new);
FilenodeInfo {
path,
filenode,
p1,
p2,
copyfrom: None,
linknode,
}
}
fn create_file_filenode(
path: Option<MPath>,
envelope: HgFileEnvelope,
linknode: HgChangesetId,
) -> Result<FilenodeInfo, Error> {
let path = match path {
Some(path) => RepoPath::FilePath(path),
None => {
return Err(format_err!("unexpected empty file path"));
}
};
let filenode = envelope.node_id();
let (p1, p2) = envelope.parents();
let copyfrom = File::extract_copied_from(envelope.metadata())?
.map(|(path, node)| (RepoPath::FilePath(path), node));
Ok(FilenodeInfo {
path,
filenode,
p1,
p2,
copyfrom,
linknode,
})
}
async fn fetch_root_manifest_id(
ctx: CoreContext,
cs_id: ChangesetId,
repo: &BlobRepo,
) -> Result<HgManifestId, Error> {
let hg_cs_id = repo
.get_hg_from_bonsai_changeset(ctx.clone(), cs_id)
.compat()
.await?;
let hg_cs = repo
.get_changeset_by_changesetid(ctx.clone(), hg_cs_id)
.compat()
.await?;
Ok(hg_cs.manifestid())
}
#[cfg(test)]
mod tests {
use super::*;
use fbinit::FacebookInit;
use mononoke_types::FileType;
use tests_utils::CreateCommitContext;
async fn verify_filenodes(
ctx: &CoreContext,
repo: &BlobRepo,
cs_id: ChangesetId,
expected_paths: Vec<RepoPath>,
) -> Result<(), Error> {
let filenodes = generate_all_filenodes(ctx.clone(), repo.clone(), cs_id).await?;
assert_eq!(filenodes.len(), expected_paths.len());
for path in expected_paths {
assert!(filenodes
.iter()
.find(|filenode| filenode.path == path)
.is_some());
}
let linknode = repo
.get_hg_from_bonsai_changeset(ctx.clone(), cs_id)
.compat()
.await?;
for filenode in filenodes {
assert_eq!(filenode.linknode, linknode);
}
Ok(())
}
async fn test_generate_filenodes_simple(fb: FacebookInit) -> Result<(), Error> {
let ctx = CoreContext::test_mock(fb);
let repo = blobrepo_factory::new_memblob_empty(None)?;
let filename = "path";
let commit = CreateCommitContext::new_root(&ctx, &repo)
.add_file(filename, "content")
.commit()
.await?;
// Two filenodes - one for root manifest, another for a file
verify_filenodes(
&ctx,
&repo,
commit,
vec![RepoPath::RootPath, RepoPath::file(filename)?],
)
.await?;
Ok(())
}
#[fbinit::test]
fn generate_filenodes_simple(fb: FacebookInit) -> Result<(), Error> {
let mut runtime = tokio_compat::runtime::Runtime::new()?;
runtime.block_on_std(test_generate_filenodes_simple(fb))
}
async fn test_generate_filenodes_merge(fb: FacebookInit) -> Result<(), Error> {
let ctx = CoreContext::test_mock(fb);
let repo = blobrepo_factory::new_memblob_empty(None)?;
let first_p1 = CreateCommitContext::new_root(&ctx, &repo)
.add_file("path1", "content")
.commit()
.await?;
let first_p2 = CreateCommitContext::new_root(&ctx, &repo)
.add_file("path2", "content")
.commit()
.await?;
let merge = CreateCommitContext::new(&ctx, &repo, vec![first_p1, first_p2])
.commit()
.await?;
// Only root filenode was added - other filenodes were reused from parents
verify_filenodes(&ctx, &repo, merge, vec![RepoPath::RootPath]).await?;
Ok(())
}
#[fbinit::test]
fn generate_filenodes_merge(fb: FacebookInit) -> Result<(), Error> {
let mut runtime = tokio_compat::runtime::Runtime::new()?;
runtime.block_on_std(test_generate_filenodes_merge(fb))
}
async fn test_generate_type_change(fb: FacebookInit) -> Result<(), Error> {
let ctx = CoreContext::test_mock(fb);
let repo = blobrepo_factory::new_memblob_empty(None)?;
let parent = CreateCommitContext::new_root(&ctx, &repo)
.add_file("path", "content")
.commit()
.await?;
let child = CreateCommitContext::new(&ctx, &repo, vec![parent])
.add_file_with_type("path", "content", FileType::Executable)
.commit()
.await?;
// Only root filenode should be changed - change of file type doesn't change filenode
verify_filenodes(&ctx, &repo, child, vec![RepoPath::RootPath]).await?;
Ok(())
}
#[fbinit::test]
fn generate_filenodes_type_change(fb: FacebookInit) -> Result<(), Error> {
let mut runtime = tokio_compat::runtime::Runtime::new()?;
runtime.block_on_std(test_generate_type_change(fb))
}
async fn test_many_parents(fb: FacebookInit) -> Result<(), Error> {
let ctx = CoreContext::test_mock(fb);
let repo = blobrepo_factory::new_memblob_empty(None)?;
let p1 = CreateCommitContext::new_root(&ctx, &repo)
.add_file("path1", "content")
.commit()
.await?;
let p2 = CreateCommitContext::new_root(&ctx, &repo)
.add_file("path2", "content")
.commit()
.await?;
let p3 = CreateCommitContext::new_root(&ctx, &repo)
.add_file("path3", "content")
.commit()
.await?;
let merge = CreateCommitContext::new(&ctx, &repo, vec![p1, p2, p3])
.commit()
.await?;
// Root filenode was changed, and all files from p3 were added (because parents beyond
// p1 an p2 are ignored when generating filenodes and hg changesets)
verify_filenodes(
&ctx,
&repo,
merge,
vec![RepoPath::RootPath, RepoPath::file("path3")?],
)
.await?;
Ok(())
}
#[fbinit::test]
fn many_parents(fb: FacebookInit) -> Result<(), Error> {
let mut runtime = tokio_compat::runtime::Runtime::new()?;
runtime.block_on_std(test_many_parents(fb))
}
}

View File

@ -11,7 +11,10 @@
pub use crate::bonsai::{bonsai_diff, BonsaiDiffFileChange}; pub use crate::bonsai::{bonsai_diff, BonsaiDiffFileChange};
pub use crate::derive::{derive_manifest, derive_manifest_with_io_sender, LeafInfo, TreeInfo}; pub use crate::derive::{derive_manifest, derive_manifest_with_io_sender, LeafInfo, TreeInfo};
pub use crate::implicit_deletes::get_implicit_deletes; pub use crate::implicit_deletes::get_implicit_deletes;
pub use crate::ops::{find_intersection_of_diffs, Diff, ManifestOps, PathOrPrefix}; pub use crate::ops::{
find_intersection_of_diffs, find_intersection_of_diffs_and_parents, Diff, ManifestOps,
PathOrPrefix,
};
pub use crate::types::{Entry, Manifest, PathTree, StoreLoadable, Traced}; pub use crate::types::{Entry, Manifest, PathTree, StoreLoadable, Traced};
mod bonsai; mod bonsai;

View File

@ -431,6 +431,31 @@ pub fn find_intersection_of_diffs<TreeId, LeafId, Store>(
mf_id: TreeId, mf_id: TreeId,
diff_against: Vec<TreeId>, diff_against: Vec<TreeId>,
) -> impl Stream<Item = (Option<MPath>, Entry<TreeId, LeafId>), Error = Error> ) -> impl Stream<Item = (Option<MPath>, Entry<TreeId, LeafId>), Error = Error>
where
Store: Sync + Send + Clone + 'static,
TreeId: StoreLoadable<Store> + Clone + Send + Eq + 'static,
<TreeId as StoreLoadable<Store>>::Value: Manifest<TreeId = TreeId, LeafId = LeafId> + Send,
LeafId: Clone + Send + Eq + 'static,
{
find_intersection_of_diffs_and_parents(ctx, store, mf_id, diff_against)
.map(|(path, entry, _)| (path, entry))
}
/// Like `find_intersection_of_diffs` but for each returned entry it also returns diff_against
/// entries with the same path.
pub fn find_intersection_of_diffs_and_parents<TreeId, LeafId, Store>(
ctx: CoreContext,
store: Store,
mf_id: TreeId,
diff_against: Vec<TreeId>,
) -> impl Stream<
Item = (
Option<MPath>,
Entry<TreeId, LeafId>,
Vec<Entry<TreeId, LeafId>>,
),
Error = Error,
>
where where
Store: Sync + Send + Clone + 'static, Store: Sync + Send + Clone + 'static,
TreeId: StoreLoadable<Store> + Clone + Send + Eq + 'static, TreeId: StoreLoadable<Store> + Clone + Send + Eq + 'static,
@ -441,9 +466,9 @@ where
Some(parent) => (*parent) Some(parent) => (*parent)
.diff(ctx.clone(), store.clone(), mf_id) .diff(ctx.clone(), store.clone(), mf_id)
.filter_map(|diff_entry| match diff_entry { .filter_map(|diff_entry| match diff_entry {
Diff::Added(path, entry) => Some((path, entry)), Diff::Added(path, entry) => Some((path, entry, vec![])),
Diff::Removed(..) => None, Diff::Removed(..) => None,
Diff::Changed(path, _, entry) => Some((path, entry)), Diff::Changed(path, parent_entry, entry) => Some((path, entry, vec![parent_entry])),
}) })
.collect() .collect()
.and_then({ .and_then({
@ -452,7 +477,7 @@ where
let paths: Vec<_> = new_entries let paths: Vec<_> = new_entries
.clone() .clone()
.into_iter() .into_iter()
.map(|(path, _)| path) .map(|(path, _, _)| path)
.collect(); .collect();
let futs = diff_against.into_iter().skip(1).map(move |p| { let futs = diff_against.into_iter().skip(1).map(move |p| {
@ -463,17 +488,21 @@ where
future::join_all(futs).map(move |entries_in_parents| { future::join_all(futs).map(move |entries_in_parents| {
let mut res = vec![]; let mut res = vec![];
for (path, unode) in new_entries { for (path, unode, mut parent_entries) in new_entries {
let mut new_entry = true; let mut new_entry = true;
for p in &entries_in_parents { for p in &entries_in_parents {
if p.get(&path) == Some(&unode) { if let Some(parent_entry) = p.get(&path) {
new_entry = false; if parent_entry == &unode {
break; new_entry = false;
break;
} else {
parent_entries.push(parent_entry.clone());
}
} }
} }
if new_entry { if new_entry {
res.push((path, unode)); res.push((path, unode, parent_entries));
} }
} }
@ -486,6 +515,7 @@ where
.left_stream(), .left_stream(),
None => mf_id None => mf_id
.list_all_entries(ctx.clone(), store.clone()) .list_all_entries(ctx.clone(), store.clone())
.map(|(path, entry)| (path, entry, vec![]))
.right_stream(), .right_stream(),
} }
} }