derived_data: don't use default mapping in batch derivation

Summary:
Now that the mapping is separated from BonsaiDerivable, it becomes clear where
batch derivation is incorrectly using the default mapping, rather than the
mapping that has been provided for batch-derivation.

This could mean, for example, if we are backfilling a v2 of these derived
data types, we could accidentally base them on a v1 parent that we obtained
from the default mapping.

Instead, ensure that we don't use `BonsaiDerived` and thus can't use the
default mapping.

Reviewed By: krallin

Differential Revision: D25371963

fbshipit-source-id: fb71e1f1c4bd7a112d3099e0e5c5c7111d457cd2
This commit is contained in:
Mark Juggurnauth-Thomas 2020-12-14 09:22:57 -08:00 committed by Facebook GitHub Bot
parent e2cd66da84
commit 5e549d348f
7 changed files with 144 additions and 35 deletions

View File

@ -13,7 +13,7 @@ use borrowed::borrowed;
use cloned::cloned;
use context::CoreContext;
use derived_data::batch::split_batch_in_linear_stacks;
use derived_data::BonsaiDerived;
use derived_data::{derive_impl, BonsaiDerivedMapping};
use futures::stream::{FuturesOrdered, TryStreamExt};
use mononoke_types::{ChangesetId, FsnodeId};
@ -48,11 +48,15 @@ use crate::RootFsnodeId;
///
/// Fsnode derivation can be cpu-bounded, and the speed up is achieved by spawning derivation on different
/// tokio tasks - this allows us to use more cpu.
pub async fn derive_fsnode_in_batch(
pub async fn derive_fsnode_in_batch<Mapping>(
ctx: &CoreContext,
repo: &BlobRepo,
mapping: &Mapping,
batch: Vec<ChangesetId>,
) -> Result<HashMap<ChangesetId, FsnodeId>, Error> {
) -> Result<HashMap<ChangesetId, FsnodeId>, Error>
where
Mapping: BonsaiDerivedMapping<Value = RootFsnodeId> + 'static,
{
let linear_stacks = split_batch_in_linear_stacks(ctx, repo, batch).await?;
let mut res = HashMap::new();
for linear_stack in linear_stacks {
@ -67,7 +71,11 @@ pub async fn derive_fsnode_in_batch(
async move {
match res.get(&p) {
Some(fsnode_id) => Ok::<_, Error>(*fsnode_id),
None => Ok(RootFsnodeId::derive(ctx, repo, p).await?.into_fsnode_id()),
None => Ok(derive_impl::derive_impl::<RootFsnodeId, Mapping>(
ctx, repo, mapping, p,
)
.await?
.into_fsnode_id()),
}
}
})
@ -105,6 +113,7 @@ pub async fn derive_fsnode_in_batch(
#[cfg(test)]
mod test {
use super::*;
use derived_data::BonsaiDerived;
use fbinit::FacebookInit;
use fixtures::linear;
use futures::compat::Stream01CompatExt;
@ -118,14 +127,14 @@ mod test {
let repo = linear::getrepo(fb).await;
let master_cs_id = resolve_cs_id(&ctx, &repo, "master").await?;
let cs_ids =
let mapping = RootFsnodeId::default_mapping(&ctx, &repo)?;
let mut cs_ids =
AncestorsNodeStream::new(ctx.clone(), &repo.get_changeset_fetcher(), master_cs_id)
.compat()
.try_collect::<Vec<_>>()
.await?;
let fsnode_ids =
derive_fsnode_in_batch(&ctx, &repo, cs_ids.clone().into_iter().rev().collect())
.await?;
cs_ids.reverse();
let fsnode_ids = derive_fsnode_in_batch(&ctx, &repo, &mapping, cs_ids).await?;
fsnode_ids.get(&master_cs_id).unwrap().clone()
};

View File

@ -96,7 +96,7 @@ impl BonsaiDerivable for RootFsnodeId {
where
BatchMapping: BonsaiDerivedMapping<Value = Self> + Send + Sync + Clone + 'static,
{
let derived = derive_fsnode_in_batch(ctx, repo, csids.clone()).await?;
let derived = derive_fsnode_in_batch(ctx, repo, mapping, csids.clone()).await?;
stream::iter(derived.into_iter().map(|(cs_id, derived)| async move {
let derived = RootFsnodeId(derived);

View File

@ -13,7 +13,7 @@ use borrowed::borrowed;
use cloned::cloned;
use context::CoreContext;
use derived_data::batch::split_batch_in_linear_stacks;
use derived_data::BonsaiDerived;
use derived_data::{derive_impl, BonsaiDerivedMapping};
use futures::stream::{FuturesOrdered, TryStreamExt};
use mononoke_types::{ChangesetId, SkeletonManifestId};
@ -26,11 +26,15 @@ use crate::RootSkeletonManifestId;
///
/// This is the same mechanism as fsnodes, see `derive_fsnode_in_batch` for
/// more details.
pub async fn derive_skeleton_manifests_in_batch(
pub async fn derive_skeleton_manifests_in_batch<Mapping>(
ctx: &CoreContext,
repo: &BlobRepo,
mapping: &Mapping,
batch: Vec<ChangesetId>,
) -> Result<HashMap<ChangesetId, SkeletonManifestId>, Error> {
) -> Result<HashMap<ChangesetId, SkeletonManifestId>, Error>
where
Mapping: BonsaiDerivedMapping<Value = RootSkeletonManifestId> + 'static,
{
let linear_stacks = split_batch_in_linear_stacks(ctx, repo, batch).await?;
let mut res = HashMap::new();
for linear_stack in linear_stacks {
@ -46,9 +50,11 @@ pub async fn derive_skeleton_manifests_in_batch(
async move {
match res.get(&p) {
Some(sk_mf_id) => Ok::<_, Error>(*sk_mf_id),
None => Ok(RootSkeletonManifestId::derive(ctx, repo, p)
.await?
.into_skeleton_manifest_id()),
None => Ok(derive_impl::derive_impl::<RootSkeletonManifestId, Mapping>(
ctx, repo, mapping, p,
)
.await?
.into_skeleton_manifest_id()),
}
}
})
@ -92,6 +98,7 @@ pub async fn derive_skeleton_manifests_in_batch(
#[cfg(test)]
mod test {
use super::*;
use derived_data::BonsaiDerived;
use fbinit::FacebookInit;
use fixtures::linear;
use futures::compat::Stream01CompatExt;
@ -105,17 +112,15 @@ mod test {
let repo = linear::getrepo(fb).await;
let master_cs_id = resolve_cs_id(&ctx, &repo, "master").await?;
let cs_ids =
let mapping = RootSkeletonManifestId::default_mapping(&ctx, &repo)?;
let mut cs_ids =
AncestorsNodeStream::new(ctx.clone(), &repo.get_changeset_fetcher(), master_cs_id)
.compat()
.try_collect::<Vec<_>>()
.await?;
let sk_mf_ids = derive_skeleton_manifests_in_batch(
&ctx,
&repo,
cs_ids.clone().into_iter().rev().collect(),
)
.await?;
cs_ids.reverse();
let sk_mf_ids =
derive_skeleton_manifests_in_batch(&ctx, &repo, &mapping, cs_ids).await?;
sk_mf_ids.get(&master_cs_id).unwrap().clone()
};

View File

@ -98,7 +98,7 @@ impl BonsaiDerivable for RootSkeletonManifestId {
where
BatchMapping: BonsaiDerivedMapping<Value = Self> + Send + Sync + Clone + 'static,
{
let derived = derive_skeleton_manifests_in_batch(ctx, repo, csids.clone()).await?;
let derived = derive_skeleton_manifests_in_batch(ctx, repo, mapping, csids.clone()).await?;
stream::iter(derived.into_iter().map(|(cs_id, derived)| async move {
let derived = RootSkeletonManifestId(derived);

View File

@ -171,7 +171,7 @@ fn should_log_slow_derivation(duration: Duration) -> bool {
duration > Duration::from_secs(threshold)
}
pub(crate) async fn find_topo_sorted_underived<
pub async fn find_topo_sorted_underived<
Derivable: BonsaiDerivable,
Mapping: BonsaiDerivedMapping<Value = Derivable> + Send + Sync + Clone + 'static,
Changesets: IntoIterator<Item = ChangesetId>,

View File

@ -42,6 +42,7 @@ slog = { version = "2.5", features = ["max_level_debug"] }
blobrepo_factory = { path = "../../blobrepo/factory" }
bookmarks = { path = "../../bookmarks" }
fixtures = { path = "../../tests/fixtures" }
metaconfig_types = { path = "../../metaconfig/types" }
tests_utils = { path = "../../tests/utils" }
fbinit = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" }
maplit = "1.0"

View File

@ -19,8 +19,8 @@ use cloned::cloned;
use context::{CoreContext, SessionContainer};
use deleted_files_manifest::{RootDeletedManifestId, RootDeletedManifestMapping};
use derived_data::{
derive_impl::derive_impl, BlobstoreExistsMapping, BlobstoreRootIdMapping, BonsaiDerivable,
BonsaiDerived, BonsaiDerivedMapping, DeriveError, DerivedDataTypesConfig, RegenerateMapping,
derive_impl, BlobstoreExistsMapping, BlobstoreRootIdMapping, BonsaiDerivable,
BonsaiDerivedMapping, DerivedDataTypesConfig, RegenerateMapping,
};
use derived_data_filenodes::{FilenodesOnlyPublic, FilenodesOnlyPublicMapping};
use fastlog::{RootFastlog, RootFastlogMapping};
@ -186,7 +186,7 @@ impl<M> DerivedUtilsFromMapping<M> {
impl<M> DerivedUtils for DerivedUtilsFromMapping<M>
where
M: BonsaiDerivedMapping + Clone + 'static,
M::Value: BonsaiDerived + std::fmt::Debug,
M::Value: BonsaiDerivable + std::fmt::Debug,
{
fn derive(
&self,
@ -200,7 +200,8 @@ where
// even if it was already generated (see RegenerateMapping call).
cloned!(self.mapping);
async move {
let result = derive_impl::<M::Value, _>(&ctx, &repo, &mapping, csid).await?;
let result =
derive_impl::derive_impl::<M::Value, _>(&ctx, &repo, &mapping, csid).await?;
Ok(format!("{:?}", result))
}
.boxed()
@ -264,7 +265,8 @@ where
let ctx = CoreContext::new_with_logger(ctx.fb, ctx.logger().clone());
// derive each changeset sequentially
derive_impl::<M::Value, _>(&ctx, &repo, &in_memory_mapping, csid).await?;
derive_impl::derive_impl::<M::Value, _>(&ctx, &repo, &in_memory_mapping, csid)
.await?;
}
}
@ -304,15 +306,17 @@ where
) -> Result<Option<BonsaiChangeset>, Error> {
let mut underived_ancestors = vec![];
for cs_id in csids {
underived_ancestors.push(M::Value::find_all_underived_ancestors(
underived_ancestors.push(derive_impl::find_topo_sorted_underived::<M::Value, _, _>(
ctx,
repo,
&self.mapping,
vec![*cs_id],
None,
));
}
let boxed_stream = stream::iter(underived_ancestors)
.map(Result::<_, DeriveError>::Ok)
.map(Result::<_, Error>::Ok)
.try_buffer_unordered(100)
// boxed() is necessary to avoid "one type is more general than the other" error
.boxed();
@ -910,14 +914,16 @@ pub fn find_underived_many(
mod tests {
use super::*;
use bookmarks::BookmarkName;
use derived_data::BonsaiDerived;
use fbinit::FacebookInit;
use fixtures::merge_even;
use maplit::btreemap;
use maplit::{btreemap, hashset};
use metaconfig_types::UnodeVersion;
use std::{
collections::BTreeMap,
sync::atomic::{AtomicUsize, Ordering},
};
use tests_utils::drawdag::{changes, create_from_dag_with_changes};
use tests_utils::drawdag::create_from_dag;
// decompose graph into map between node indices and list of nodes
fn derive_graph_unpack(node: &DeriveGraph) -> (BTreeMap<usize, Vec<usize>>, Vec<DeriveGraph>) {
@ -1113,8 +1119,7 @@ mod tests {
async fn test_find_underived_many(fb: FacebookInit) -> Result<(), Error> {
let ctx = CoreContext::test_mock(fb);
let repo = blobrepo_factory::new_memblob_empty(None)?;
let dag = create_from_dag_with_changes(&ctx, &repo, "A-B-C", changes! {}).await?;
let dag = create_from_dag(&ctx, &repo, "A-B-C").await?;
let a = *dag.get("A").unwrap();
let b = *dag.get("B").unwrap();
let c = *dag.get("C").unwrap();
@ -1176,4 +1181,93 @@ mod tests {
Ok::<_, Error>(())
}
#[fbinit::compat_test]
async fn multiple_independent_mappings(fb: FacebookInit) -> Result<(), Error> {
let ctx = CoreContext::test_mock(fb);
let repo = blobrepo_factory::new_memblob_empty(None)?;
let dag = create_from_dag(&ctx, &repo, "A-B-C").await?;
let a = *dag.get("A").unwrap();
let b = *dag.get("B").unwrap();
let c = *dag.get("C").unwrap();
// Create utils for both versions of unodes.
let utils_v1 = derived_data_utils_impl(
&repo,
"unodes",
&DerivedDataTypesConfig {
types: hashset! { String::from("unodes") },
unode_version: UnodeVersion::V1,
..Default::default()
},
)?;
let utils_v2 = derived_data_utils_impl(
&repo,
"unodes",
&DerivedDataTypesConfig {
types: hashset! { String::from("unodes") },
unode_version: UnodeVersion::V2,
..Default::default()
},
)?;
assert_eq!(
utils_v1
.find_oldest_underived(&ctx, &repo, &vec![c])
.await?
.unwrap()
.get_changeset_id(),
a
);
assert_eq!(
utils_v2
.find_oldest_underived(&ctx, &repo, &vec![c])
.await?
.unwrap()
.get_changeset_id(),
a
);
// Derive V1 of A using the V1 utils. V2 of A should still be underived.
utils_v1.derive(ctx.clone(), repo.clone(), a).await?;
assert_eq!(
utils_v1
.find_oldest_underived(&ctx, &repo, &vec![c])
.await?
.unwrap()
.get_changeset_id(),
b
);
assert_eq!(
utils_v2
.find_oldest_underived(&ctx, &repo, &vec![c])
.await?
.unwrap()
.get_changeset_id(),
a
);
// Derive B directly, which should use the V2 mapping, as that is the
// version configured on the repo. V1 of B should still be underived.
RootUnodeManifestId::derive(&ctx, &repo, b).await?;
assert_eq!(
utils_v1
.find_oldest_underived(&ctx, &repo, &vec![c])
.await?
.unwrap()
.get_changeset_id(),
b
);
assert_eq!(
utils_v2
.find_oldest_underived(&ctx, &repo, &vec![c])
.await?
.unwrap()
.get_changeset_id(),
c
);
Ok(())
}
}