mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 00:45:18 +03:00
mononoke: introduce generate_filenodes method
Summary: At the moment filenodes [1] are generated as soon as hg changeset is generated (see get_manifest_from_bonsai() from bonsai function). This is not great for a few reasons: 1) It makes code more complicated - we have to pass IncomepleteFilenodes structure all the way down to derive_hg_manifest() function, we have to make pushrebase aware of the filenodes and we have to pass "draft" flag to CreateChangeset. 2) It's actually incorrect - we don't want to generate filenodes for draft commits at all [2]. It works right now because we don't generate hg changesets for commit cloud commits, but it's just a matter of time when we start doing it [3]. The goal of this stack is to separate filenode generation from hg changeset generation, and move filenode generation onto derived data. Filenodes will be generated during generating getbundle or pusrebase responses. This diff implements the core functionality - it generates a list of filenodes for a bonsai changeset. It does so by finding all changed files/manifests for a commits, and then fetching respective envelopes to get p1/p2/copy info. Note that it's possible to avoid fetching file envelopes, however then we'd have to replicate a complicated logic of finding correct filenode parents (see https://fburl.com/25l6h1nm), and I don't think perf benefit outweighs introduced code complexity. Reviewed By: farnz Differential Revision: D19501515 fbshipit-source-id: 8bf9b9fcbf832d99443a146257cd93cdf7e34130
This commit is contained in:
parent
4b3abaf708
commit
b3e4fe38e3
316
mononoke/derived_data/filenodes/lib.rs
Normal file
316
mononoke/derived_data/filenodes/lib.rs
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||||
|
*
|
||||||
|
* This software may be used and distributed according to the terms of the
|
||||||
|
* GNU General Public License found in the LICENSE file in the root
|
||||||
|
* directory of this source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#![deny(warnings)]
|
||||||
|
|
||||||
|
use anyhow::{format_err, Error};
|
||||||
|
use blobrepo::BlobRepo;
|
||||||
|
use context::CoreContext;
|
||||||
|
use filenodes::FilenodeInfo;
|
||||||
|
use futures::Future;
|
||||||
|
use futures_ext::FutureExt;
|
||||||
|
use futures_preview::compat::{Future01CompatExt, Stream01CompatExt};
|
||||||
|
use futures_util::{future::try_join_all, try_join, TryStreamExt};
|
||||||
|
use manifest::{find_intersection_of_diffs_and_parents, Entry};
|
||||||
|
use mercurial_types::{
|
||||||
|
blobs::{fetch_file_envelope, File},
|
||||||
|
fetch_manifest_envelope, HgChangesetId, HgFileEnvelope, HgFileNodeId, HgManifestEnvelope,
|
||||||
|
HgManifestId,
|
||||||
|
};
|
||||||
|
use mononoke_types::{ChangesetId, MPath, RepoPath};
|
||||||
|
|
||||||
|
pub async fn generate_all_filenodes(
|
||||||
|
ctx: CoreContext,
|
||||||
|
repo: BlobRepo,
|
||||||
|
cs_id: ChangesetId,
|
||||||
|
) -> Result<Vec<FilenodeInfo>, Error> {
|
||||||
|
let parents = repo
|
||||||
|
.get_changeset_parents_by_bonsai(ctx.clone(), cs_id)
|
||||||
|
.compat()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Mercurial commits can have only 2 parents, however bonsai commits can have more
|
||||||
|
// In that case p3, p4, ... are ignored (they are called step parents)
|
||||||
|
let cs_parents: Vec<_> = parents.into_iter().take(2).collect();
|
||||||
|
let root_mf = fetch_root_manifest_id(ctx.clone(), cs_id, &repo);
|
||||||
|
let parents = try_join_all(
|
||||||
|
cs_parents
|
||||||
|
.clone()
|
||||||
|
.into_iter()
|
||||||
|
.map(|p| fetch_root_manifest_id(ctx.clone(), p, &repo)),
|
||||||
|
);
|
||||||
|
|
||||||
|
let linknode = repo
|
||||||
|
.get_hg_from_bonsai_changeset(ctx.clone(), cs_id)
|
||||||
|
.compat();
|
||||||
|
|
||||||
|
let (root_mf, parents, linknode) = try_join!(root_mf, parents, linknode)?;
|
||||||
|
let blobstore = repo.get_blobstore().boxed();
|
||||||
|
find_intersection_of_diffs_and_parents(
|
||||||
|
ctx.clone(),
|
||||||
|
repo.get_blobstore(),
|
||||||
|
root_mf,
|
||||||
|
parents.clone(),
|
||||||
|
)
|
||||||
|
.compat()
|
||||||
|
.try_filter_map(|(path, entry, parent_entries)| {
|
||||||
|
async move {
|
||||||
|
// file entry has file type and file node id. If file type is different but filenode is
|
||||||
|
// the same we don't want to create a new filenode, and this filter removes
|
||||||
|
// all entries where at least one parent has the same filenode id.
|
||||||
|
if let Entry::Leaf((_, hg_filenode_id)) = entry {
|
||||||
|
for parent_entry in parent_entries {
|
||||||
|
if let Entry::Leaf((_, parent_filenode_id)) = parent_entry {
|
||||||
|
if parent_filenode_id == hg_filenode_id {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Some((path, entry)))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map_ok(move |(path, entry)| match entry {
|
||||||
|
Entry::Tree(hg_mf_id) => fetch_manifest_envelope(ctx.clone(), &blobstore, hg_mf_id)
|
||||||
|
.map(move |envelope| create_manifest_filenode(path, envelope, linknode))
|
||||||
|
.left_future()
|
||||||
|
.compat(),
|
||||||
|
Entry::Leaf((_, hg_filenode_id)) => {
|
||||||
|
fetch_file_envelope(ctx.clone(), &blobstore, hg_filenode_id)
|
||||||
|
.and_then(move |envelope| create_file_filenode(path, envelope, linknode))
|
||||||
|
.right_future()
|
||||||
|
.compat()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.try_buffer_unordered(100)
|
||||||
|
.try_collect()
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_manifest_filenode(
|
||||||
|
path: Option<MPath>,
|
||||||
|
envelope: HgManifestEnvelope,
|
||||||
|
linknode: HgChangesetId,
|
||||||
|
) -> FilenodeInfo {
|
||||||
|
let path = match path {
|
||||||
|
Some(path) => RepoPath::DirectoryPath(path),
|
||||||
|
None => RepoPath::RootPath,
|
||||||
|
};
|
||||||
|
let filenode = HgFileNodeId::new(envelope.node_id());
|
||||||
|
let (p1, p2) = envelope.parents();
|
||||||
|
let p1 = p1.map(HgFileNodeId::new);
|
||||||
|
let p2 = p2.map(HgFileNodeId::new);
|
||||||
|
|
||||||
|
FilenodeInfo {
|
||||||
|
path,
|
||||||
|
filenode,
|
||||||
|
p1,
|
||||||
|
p2,
|
||||||
|
copyfrom: None,
|
||||||
|
linknode,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_file_filenode(
|
||||||
|
path: Option<MPath>,
|
||||||
|
envelope: HgFileEnvelope,
|
||||||
|
linknode: HgChangesetId,
|
||||||
|
) -> Result<FilenodeInfo, Error> {
|
||||||
|
let path = match path {
|
||||||
|
Some(path) => RepoPath::FilePath(path),
|
||||||
|
None => {
|
||||||
|
return Err(format_err!("unexpected empty file path"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let filenode = envelope.node_id();
|
||||||
|
let (p1, p2) = envelope.parents();
|
||||||
|
let copyfrom = File::extract_copied_from(envelope.metadata())?
|
||||||
|
.map(|(path, node)| (RepoPath::FilePath(path), node));
|
||||||
|
|
||||||
|
Ok(FilenodeInfo {
|
||||||
|
path,
|
||||||
|
filenode,
|
||||||
|
p1,
|
||||||
|
p2,
|
||||||
|
copyfrom,
|
||||||
|
linknode,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_root_manifest_id(
|
||||||
|
ctx: CoreContext,
|
||||||
|
cs_id: ChangesetId,
|
||||||
|
repo: &BlobRepo,
|
||||||
|
) -> Result<HgManifestId, Error> {
|
||||||
|
let hg_cs_id = repo
|
||||||
|
.get_hg_from_bonsai_changeset(ctx.clone(), cs_id)
|
||||||
|
.compat()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let hg_cs = repo
|
||||||
|
.get_changeset_by_changesetid(ctx.clone(), hg_cs_id)
|
||||||
|
.compat()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(hg_cs.manifestid())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use fbinit::FacebookInit;
|
||||||
|
use mononoke_types::FileType;
|
||||||
|
use tests_utils::CreateCommitContext;
|
||||||
|
|
||||||
|
async fn verify_filenodes(
|
||||||
|
ctx: &CoreContext,
|
||||||
|
repo: &BlobRepo,
|
||||||
|
cs_id: ChangesetId,
|
||||||
|
expected_paths: Vec<RepoPath>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let filenodes = generate_all_filenodes(ctx.clone(), repo.clone(), cs_id).await?;
|
||||||
|
|
||||||
|
assert_eq!(filenodes.len(), expected_paths.len());
|
||||||
|
for path in expected_paths {
|
||||||
|
assert!(filenodes
|
||||||
|
.iter()
|
||||||
|
.find(|filenode| filenode.path == path)
|
||||||
|
.is_some());
|
||||||
|
}
|
||||||
|
|
||||||
|
let linknode = repo
|
||||||
|
.get_hg_from_bonsai_changeset(ctx.clone(), cs_id)
|
||||||
|
.compat()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
for filenode in filenodes {
|
||||||
|
assert_eq!(filenode.linknode, linknode);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn test_generate_filenodes_simple(fb: FacebookInit) -> Result<(), Error> {
|
||||||
|
let ctx = CoreContext::test_mock(fb);
|
||||||
|
let repo = blobrepo_factory::new_memblob_empty(None)?;
|
||||||
|
let filename = "path";
|
||||||
|
let commit = CreateCommitContext::new_root(&ctx, &repo)
|
||||||
|
.add_file(filename, "content")
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Two filenodes - one for root manifest, another for a file
|
||||||
|
verify_filenodes(
|
||||||
|
&ctx,
|
||||||
|
&repo,
|
||||||
|
commit,
|
||||||
|
vec![RepoPath::RootPath, RepoPath::file(filename)?],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[fbinit::test]
|
||||||
|
fn generate_filenodes_simple(fb: FacebookInit) -> Result<(), Error> {
|
||||||
|
let mut runtime = tokio_compat::runtime::Runtime::new()?;
|
||||||
|
runtime.block_on_std(test_generate_filenodes_simple(fb))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn test_generate_filenodes_merge(fb: FacebookInit) -> Result<(), Error> {
|
||||||
|
let ctx = CoreContext::test_mock(fb);
|
||||||
|
let repo = blobrepo_factory::new_memblob_empty(None)?;
|
||||||
|
let first_p1 = CreateCommitContext::new_root(&ctx, &repo)
|
||||||
|
.add_file("path1", "content")
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let first_p2 = CreateCommitContext::new_root(&ctx, &repo)
|
||||||
|
.add_file("path2", "content")
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let merge = CreateCommitContext::new(&ctx, &repo, vec![first_p1, first_p2])
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Only root filenode was added - other filenodes were reused from parents
|
||||||
|
verify_filenodes(&ctx, &repo, merge, vec![RepoPath::RootPath]).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[fbinit::test]
|
||||||
|
fn generate_filenodes_merge(fb: FacebookInit) -> Result<(), Error> {
|
||||||
|
let mut runtime = tokio_compat::runtime::Runtime::new()?;
|
||||||
|
runtime.block_on_std(test_generate_filenodes_merge(fb))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn test_generate_type_change(fb: FacebookInit) -> Result<(), Error> {
|
||||||
|
let ctx = CoreContext::test_mock(fb);
|
||||||
|
let repo = blobrepo_factory::new_memblob_empty(None)?;
|
||||||
|
let parent = CreateCommitContext::new_root(&ctx, &repo)
|
||||||
|
.add_file("path", "content")
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let child = CreateCommitContext::new(&ctx, &repo, vec![parent])
|
||||||
|
.add_file_with_type("path", "content", FileType::Executable)
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Only root filenode should be changed - change of file type doesn't change filenode
|
||||||
|
verify_filenodes(&ctx, &repo, child, vec![RepoPath::RootPath]).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[fbinit::test]
|
||||||
|
fn generate_filenodes_type_change(fb: FacebookInit) -> Result<(), Error> {
|
||||||
|
let mut runtime = tokio_compat::runtime::Runtime::new()?;
|
||||||
|
runtime.block_on_std(test_generate_type_change(fb))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn test_many_parents(fb: FacebookInit) -> Result<(), Error> {
|
||||||
|
let ctx = CoreContext::test_mock(fb);
|
||||||
|
let repo = blobrepo_factory::new_memblob_empty(None)?;
|
||||||
|
let p1 = CreateCommitContext::new_root(&ctx, &repo)
|
||||||
|
.add_file("path1", "content")
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
let p2 = CreateCommitContext::new_root(&ctx, &repo)
|
||||||
|
.add_file("path2", "content")
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
let p3 = CreateCommitContext::new_root(&ctx, &repo)
|
||||||
|
.add_file("path3", "content")
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let merge = CreateCommitContext::new(&ctx, &repo, vec![p1, p2, p3])
|
||||||
|
.commit()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Root filenode was changed, and all files from p3 were added (because parents beyond
|
||||||
|
// p1 an p2 are ignored when generating filenodes and hg changesets)
|
||||||
|
verify_filenodes(
|
||||||
|
&ctx,
|
||||||
|
&repo,
|
||||||
|
merge,
|
||||||
|
vec![RepoPath::RootPath, RepoPath::file("path3")?],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[fbinit::test]
|
||||||
|
fn many_parents(fb: FacebookInit) -> Result<(), Error> {
|
||||||
|
let mut runtime = tokio_compat::runtime::Runtime::new()?;
|
||||||
|
runtime.block_on_std(test_many_parents(fb))
|
||||||
|
}
|
||||||
|
}
|
@ -11,7 +11,10 @@
|
|||||||
pub use crate::bonsai::{bonsai_diff, BonsaiDiffFileChange};
|
pub use crate::bonsai::{bonsai_diff, BonsaiDiffFileChange};
|
||||||
pub use crate::derive::{derive_manifest, derive_manifest_with_io_sender, LeafInfo, TreeInfo};
|
pub use crate::derive::{derive_manifest, derive_manifest_with_io_sender, LeafInfo, TreeInfo};
|
||||||
pub use crate::implicit_deletes::get_implicit_deletes;
|
pub use crate::implicit_deletes::get_implicit_deletes;
|
||||||
pub use crate::ops::{find_intersection_of_diffs, Diff, ManifestOps, PathOrPrefix};
|
pub use crate::ops::{
|
||||||
|
find_intersection_of_diffs, find_intersection_of_diffs_and_parents, Diff, ManifestOps,
|
||||||
|
PathOrPrefix,
|
||||||
|
};
|
||||||
pub use crate::types::{Entry, Manifest, PathTree, StoreLoadable, Traced};
|
pub use crate::types::{Entry, Manifest, PathTree, StoreLoadable, Traced};
|
||||||
|
|
||||||
mod bonsai;
|
mod bonsai;
|
||||||
|
@ -431,6 +431,31 @@ pub fn find_intersection_of_diffs<TreeId, LeafId, Store>(
|
|||||||
mf_id: TreeId,
|
mf_id: TreeId,
|
||||||
diff_against: Vec<TreeId>,
|
diff_against: Vec<TreeId>,
|
||||||
) -> impl Stream<Item = (Option<MPath>, Entry<TreeId, LeafId>), Error = Error>
|
) -> impl Stream<Item = (Option<MPath>, Entry<TreeId, LeafId>), Error = Error>
|
||||||
|
where
|
||||||
|
Store: Sync + Send + Clone + 'static,
|
||||||
|
TreeId: StoreLoadable<Store> + Clone + Send + Eq + 'static,
|
||||||
|
<TreeId as StoreLoadable<Store>>::Value: Manifest<TreeId = TreeId, LeafId = LeafId> + Send,
|
||||||
|
LeafId: Clone + Send + Eq + 'static,
|
||||||
|
{
|
||||||
|
find_intersection_of_diffs_and_parents(ctx, store, mf_id, diff_against)
|
||||||
|
.map(|(path, entry, _)| (path, entry))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like `find_intersection_of_diffs` but for each returned entry it also returns diff_against
|
||||||
|
/// entries with the same path.
|
||||||
|
pub fn find_intersection_of_diffs_and_parents<TreeId, LeafId, Store>(
|
||||||
|
ctx: CoreContext,
|
||||||
|
store: Store,
|
||||||
|
mf_id: TreeId,
|
||||||
|
diff_against: Vec<TreeId>,
|
||||||
|
) -> impl Stream<
|
||||||
|
Item = (
|
||||||
|
Option<MPath>,
|
||||||
|
Entry<TreeId, LeafId>,
|
||||||
|
Vec<Entry<TreeId, LeafId>>,
|
||||||
|
),
|
||||||
|
Error = Error,
|
||||||
|
>
|
||||||
where
|
where
|
||||||
Store: Sync + Send + Clone + 'static,
|
Store: Sync + Send + Clone + 'static,
|
||||||
TreeId: StoreLoadable<Store> + Clone + Send + Eq + 'static,
|
TreeId: StoreLoadable<Store> + Clone + Send + Eq + 'static,
|
||||||
@ -441,9 +466,9 @@ where
|
|||||||
Some(parent) => (*parent)
|
Some(parent) => (*parent)
|
||||||
.diff(ctx.clone(), store.clone(), mf_id)
|
.diff(ctx.clone(), store.clone(), mf_id)
|
||||||
.filter_map(|diff_entry| match diff_entry {
|
.filter_map(|diff_entry| match diff_entry {
|
||||||
Diff::Added(path, entry) => Some((path, entry)),
|
Diff::Added(path, entry) => Some((path, entry, vec![])),
|
||||||
Diff::Removed(..) => None,
|
Diff::Removed(..) => None,
|
||||||
Diff::Changed(path, _, entry) => Some((path, entry)),
|
Diff::Changed(path, parent_entry, entry) => Some((path, entry, vec![parent_entry])),
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
.and_then({
|
.and_then({
|
||||||
@ -452,7 +477,7 @@ where
|
|||||||
let paths: Vec<_> = new_entries
|
let paths: Vec<_> = new_entries
|
||||||
.clone()
|
.clone()
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(path, _)| path)
|
.map(|(path, _, _)| path)
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let futs = diff_against.into_iter().skip(1).map(move |p| {
|
let futs = diff_against.into_iter().skip(1).map(move |p| {
|
||||||
@ -463,17 +488,21 @@ where
|
|||||||
future::join_all(futs).map(move |entries_in_parents| {
|
future::join_all(futs).map(move |entries_in_parents| {
|
||||||
let mut res = vec![];
|
let mut res = vec![];
|
||||||
|
|
||||||
for (path, unode) in new_entries {
|
for (path, unode, mut parent_entries) in new_entries {
|
||||||
let mut new_entry = true;
|
let mut new_entry = true;
|
||||||
for p in &entries_in_parents {
|
for p in &entries_in_parents {
|
||||||
if p.get(&path) == Some(&unode) {
|
if let Some(parent_entry) = p.get(&path) {
|
||||||
new_entry = false;
|
if parent_entry == &unode {
|
||||||
break;
|
new_entry = false;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
parent_entries.push(parent_entry.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if new_entry {
|
if new_entry {
|
||||||
res.push((path, unode));
|
res.push((path, unode, parent_entries));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -486,6 +515,7 @@ where
|
|||||||
.left_stream(),
|
.left_stream(),
|
||||||
None => mf_id
|
None => mf_id
|
||||||
.list_all_entries(ctx.clone(), store.clone())
|
.list_all_entries(ctx.clone(), store.clone())
|
||||||
|
.map(|(path, entry)| (path, entry, vec![]))
|
||||||
.right_stream(),
|
.right_stream(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user