diff --git a/eden/mononoke/blobimport_lib/src/lib.rs b/eden/mononoke/blobimport_lib/src/lib.rs index 2c2290ae7c..21ebf88f58 100644 --- a/eden/mononoke/blobimport_lib/src/lib.rs +++ b/eden/mononoke/blobimport_lib/src/lib.rs @@ -51,7 +51,7 @@ fn derive_data_for_csids( let derivations = FuturesUnordered::new(); for data_type in derived_data_types { - let derived_utils = derived_data_utils(ctx.clone(), repo.clone(), data_type)?; + let derived_utils = derived_data_utils(repo.clone(), data_type)?; derivations.push( derived_utils diff --git a/eden/mononoke/blobrepo/factory/src/lib.rs b/eden/mononoke/blobrepo/factory/src/lib.rs index 83e43b44b3..7df3870ef5 100644 --- a/eden/mononoke/blobrepo/factory/src/lib.rs +++ b/eden/mononoke/blobrepo/factory/src/lib.rs @@ -30,6 +30,7 @@ use filestore::FilestoreConfig; use fsnodes::RootFsnodeId; use futures::{future::IntoFuture, Future}; use futures_ext::{try_boxfuture, BoxFuture, FutureExt}; +use git_types::TreeHandle; use maplit::btreeset; use memblob::EagerMemblob; use metaconfig_types::{self, DerivedDataConfig, FilestoreParams, Redaction, StorageConfig}; @@ -262,12 +263,13 @@ pub fn init_all_derived_data() -> DerivedDataConfig { DerivedDataConfig { scuba_table: None, derived_data_types: btreeset! { - RootUnodeManifestId::NAME.to_string(), + BlameRoot::NAME.to_string(), + FilenodesOnlyPublic::NAME.to_string(), RootFastlog::NAME.to_string(), RootFsnodeId::NAME.to_string(), - BlameRoot::NAME.to_string(), RootDeletedManifestId::NAME.to_string(), - FilenodesOnlyPublic::NAME.to_string(), + RootUnodeManifestId::NAME.to_string(), + TreeHandle::NAME.to_string(), }, } } diff --git a/eden/mononoke/blobrepo/src/repo.rs b/eden/mononoke/blobrepo/src/repo.rs index 447d89690f..a4a3b590d3 100644 --- a/eden/mononoke/blobrepo/src/repo.rs +++ b/eden/mononoke/blobrepo/src/repo.rs @@ -2049,3 +2049,16 @@ impl DangerousOverride> for BlobRepo { } } } + +impl DangerousOverride for BlobRepo { + fn dangerous_override(&self, modify: F) -> Self + where + F: FnOnce(DerivedDataConfig) -> DerivedDataConfig, + { + let derived_data_config = modify(self.derived_data_config.clone()); + BlobRepo { + derived_data_config, + ..self.clone() + } + } +} diff --git a/eden/mononoke/cmds/admin/derived_data.rs b/eden/mononoke/cmds/admin/derived_data.rs index 23c6f4bb0e..5e599e0876 100644 --- a/eden/mononoke/cmds/admin/derived_data.rs +++ b/eden/mononoke/cmds/admin/derived_data.rs @@ -85,7 +85,7 @@ async fn check_derived_data_exists( derived_data_type: String, hashes_or_bookmarks: Vec, ) -> Result<(), SubcommandError> { - let derived_utils = derived_data_utils(ctx.clone(), repo.clone(), derived_data_type)?; + let derived_utils = derived_data_utils(repo.clone(), derived_data_type)?; let cs_id_futs: Vec<_> = hashes_or_bookmarks .into_iter() diff --git a/eden/mononoke/cmds/backfill_derived_data.rs b/eden/mononoke/cmds/backfill_derived_data.rs index 19e303aa01..f2fed30054 100644 --- a/eden/mononoke/cmds/backfill_derived_data.rs +++ b/eden/mononoke/cmds/backfill_derived_data.rs @@ -27,7 +27,7 @@ use context::CoreContext; use dbbookmarks::SqlBookmarks; use deleted_files_manifest::RootDeletedManifestId; use derived_data::BonsaiDerived; -use derived_data_utils::{derived_data_utils, POSSIBLE_DERIVED_TYPES}; +use derived_data_utils::{derived_data_utils, derived_data_utils_unsafe, POSSIBLE_DERIVED_TYPES}; use fastlog::{fetch_parent_root_unodes, RootFastlog}; use fbinit::FacebookInit; use fsnodes::RootFsnodeId; @@ -391,8 +391,7 @@ fn subcommand_backfill>( regenerate: bool, prefetched_commits_path: P, ) -> BoxFuture<(), Error> { - let derived_utils = try_boxfuture!(derived_data_utils( - ctx.clone(), + let derived_utils = try_boxfuture!(derived_data_utils_unsafe( repo.clone(), derived_data_type.clone(), )); @@ -589,7 +588,7 @@ fn subcommand_tail( } else { repo.clone() }; - let derive = derived_data_utils(ctx.clone(), repo.clone(), name)?; + let derive = derived_data_utils(repo.clone(), name)?; let stats = stats_constructor(derive.name()); Ok((derive, maybe_unredacted_repo, Arc::new(stats))) }) @@ -689,7 +688,7 @@ fn subcommand_single( derived_data_type: String, ) -> impl Future { let repo = repo.dangerous_override(|_| Arc::new(DummyLease {}) as Arc); - let derived_utils = match derived_data_utils(ctx.clone(), repo.clone(), derived_data_type) { + let derived_utils = match derived_data_utils(repo.clone(), derived_data_type) { Ok(derived_utils) => derived_utils, Err(error) => return future::err(error).left_future(), }; @@ -806,8 +805,7 @@ mod tests { let maybe_bcs_id = runtime.block_on(repo.get_bonsai_from_hg(ctx.clone(), hg_cs_id))?; let bcs_id = maybe_bcs_id.unwrap(); - let derived_utils = - derived_data_utils(ctx.clone(), repo.clone(), RootUnodeManifestId::NAME)?; + let derived_utils = derived_data_utils(repo.clone(), RootUnodeManifestId::NAME)?; runtime.block_on(derived_utils.derive_batch(ctx.clone(), repo.clone(), vec![bcs_id]))?; Ok(()) @@ -832,8 +830,7 @@ mod tests { batch.push(maybe_bcs_id.unwrap()); } - let derived_utils = - derived_data_utils(ctx.clone(), repo.clone(), RootUnodeManifestId::NAME)?; + let derived_utils = derived_data_utils(repo.clone(), RootUnodeManifestId::NAME)?; let pending = runtime.block_on(derived_utils.pending(ctx.clone(), repo.clone(), batch.clone()))?; assert_eq!(pending.len(), hg_cs_ids.len()); @@ -863,8 +860,7 @@ mod tests { runtime.block_on(repo.get_bonsai_from_hg(ctx.clone(), first_hg_cs_id))?; let bcs_id = maybe_bcs_id.unwrap(); - let derived_utils = - derived_data_utils(ctx.clone(), repo.clone(), RootUnodeManifestId::NAME)?; + let derived_utils = derived_data_utils(repo.clone(), RootUnodeManifestId::NAME)?; let res = runtime.block_on(derived_utils.derive_batch(ctx.clone(), repo.clone(), vec![bcs_id])); // Deriving should fail because blobstore writes fail diff --git a/eden/mononoke/derived_data/src/derive_impl.rs b/eden/mononoke/derived_data/src/derive_impl.rs index 12168212cd..b8f669c932 100644 --- a/eden/mononoke/derived_data/src/derive_impl.rs +++ b/eden/mononoke/derived_data/src/derive_impl.rs @@ -5,8 +5,8 @@ * GNU General Public License version 2. */ -use crate::{BonsaiDerived, BonsaiDerivedMapping}; -use anyhow::Error; +use crate::{BonsaiDerived, BonsaiDerivedMapping, Mode}; +use anyhow::{format_err, Error}; use blobrepo::BlobRepo; use blobstore::Loadable; use cloned::cloned; @@ -15,7 +15,7 @@ use futures::{ future::{self, Loop}, stream, Future, IntoFuture, Stream, }; -use futures_ext::{bounded_traversal, BoxFuture, FutureExt, StreamExt}; +use futures_ext::{bounded_traversal, try_boxfuture, BoxFuture, FutureExt, StreamExt}; use futures_stats::Timed; use lock_ext::LockExt; use mononoke_types::ChangesetId; @@ -76,8 +76,9 @@ pub fn derive_impl< repo: BlobRepo, derived_mapping: Mapping, start_csid: ChangesetId, + mode: Mode, ) -> impl Future { - find_underived(&ctx, &repo, &derived_mapping, &start_csid, None) + find_underived(&ctx, &repo, &derived_mapping, &start_csid, None, mode) .map({ cloned!(ctx); move |commits_not_derived_to_parents| { @@ -172,14 +173,20 @@ pub fn derive_impl< .and_then(move |_| fetch_derived_may_panic(ctx, start_csid, derived_mapping)) } -fn log_if_disabled(repo: &BlobRepo) { +fn fail_if_disabled(repo: &BlobRepo) -> Result<(), Error> { if !repo .get_derived_data_config() .derived_data_types .contains(Derived::NAME) { STATS::derived_data_disabled.add_value(1, (repo.get_repoid().id(), Derived::NAME)); + return Err(format_err!( + "{} is not enabled for repo {}", + Derived::NAME, + repo.get_repoid() + )); } + Ok(()) } pub(crate) fn find_underived< @@ -191,8 +198,12 @@ pub(crate) fn find_underived< derived_mapping: &Mapping, start_csid: &ChangesetId, limit: Option, + mode: Mode, ) -> impl Future>, Error = Error> { - log_if_disabled::(repo); + if mode == Mode::OnlyIfEnabled { + try_boxfuture!(fail_if_disabled::(repo)); + } + let changeset_fetcher = repo.get_changeset_fetcher(); // This is necessary to avoid visiting the same commit a lot of times in mergy repos let visited: Arc>> = Arc::new(Mutex::new(HashSet::new())); @@ -236,6 +247,7 @@ pub(crate) fn find_underived< .traced(&ctx.trace(), "derive::find_dependencies", None) .filter_map(|x| x) .collect_to() + .boxify() } // Panics if any of the parents is not derived yet @@ -516,6 +528,7 @@ mod test { use lock_ext::LockExt; use maplit::hashmap; use mercurial_types::HgChangesetId; + use metaconfig_types::DerivedDataConfig; use mononoke_types::BonsaiChangeset; use revset::AncestorsNodeStream; use std::{ @@ -613,6 +626,13 @@ mod test { } fn derive_for_master(runtime: &mut Runtime, ctx: CoreContext, repo: BlobRepo) { + let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| { + derived_data_config + .derived_data_types + .insert(TestGenNum::NAME.to_string()); + derived_data_config + }); + let master_book = BookmarkName::new("master").unwrap(); let bcs_id = runtime .block_on(repo.get_bonsai_bookmark(ctx.clone(), &master_book)) @@ -653,12 +673,21 @@ mod test { .unwrap(); } + fn init_linear(fb: FacebookInit) -> BlobRepo { + linear::getrepo(fb).dangerous_override(|mut derived_data_config: DerivedDataConfig| { + derived_data_config + .derived_data_types + .insert(TestGenNum::NAME.to_string()); + derived_data_config + }) + } + #[fbinit::test] fn test_incomplete_maping(fb: FacebookInit) -> Result<(), Error> { let ctx = CoreContext::test_mock(fb); let mut runtime = Runtime::new()?; - let repo = linear::getrepo(fb); + let repo = init_linear(fb); runtime.block_on( async move { // This is the parent of the root commit @@ -700,7 +729,7 @@ mod test { let ctx = CoreContext::test_mock(fb); let mut runtime = Runtime::new()?; - let repo = linear::getrepo(fb); + let repo = init_linear(fb); runtime.block_on( async move { // This is the parent of the root commit @@ -782,9 +811,9 @@ mod test { #[fbinit::test] fn test_leases(fb: FacebookInit) -> Result<(), Error> { let ctx = CoreContext::test_mock(fb); - let repo = linear::getrepo(fb); - let mapping = TestGenNum::mapping(&ctx, &repo); let mut runtime = Runtime::new()?; + let repo = init_linear(fb); + let mapping = TestGenNum::mapping(&ctx, &repo); let hg_csid = HgChangesetId::from_str("2d7d4ba9ce0a6ffd222de7785b249ead9c51c536")?; let csid = runtime @@ -878,7 +907,7 @@ mod test { fn test_always_failing_lease(fb: FacebookInit) -> Result<(), Error> { let ctx = CoreContext::test_mock(fb); let repo = - linear::getrepo(fb).dangerous_override(|_| Arc::new(FailingLease) as Arc); + init_linear(fb).dangerous_override(|_| Arc::new(FailingLease) as Arc); let mapping = TestGenNum::mapping(&ctx, &repo); let mut runtime = Runtime::new()?; diff --git a/eden/mononoke/derived_data/src/lib.rs b/eden/mononoke/derived_data/src/lib.rs index 936a90294b..d3f71b59bd 100644 --- a/eden/mononoke/derived_data/src/lib.rs +++ b/eden/mononoke/derived_data/src/lib.rs @@ -21,6 +21,15 @@ use std::{ pub mod derive_impl; +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum Mode { + /// This mode should almost always be preferred + OnlyIfEnabled, + /// This mode should rarely be used, perhaps only for backfilling type of derived data + /// which is not enabled in this repo yet + Unsafe, +} + /// Trait for the data that can be derived from bonsai changeset. /// Examples of that are hg changeset id, unodes root manifest id, git changeset ids etc pub trait BonsaiDerived: Sized + 'static + Send + Sync + Clone { @@ -53,12 +62,35 @@ pub trait BonsaiDerived: Sized + 'static + Send + Sync + Clone { /// This function is the entrypoint for changeset derivation, it converts /// bonsai representation to derived one by calling derive_from_parents(), and saves mapping /// from csid -> BonsaiDerived in BonsaiDerivedMapping + /// + /// This function fails immediately if this type of derived data is not enabled for this repo. fn derive(ctx: CoreContext, repo: BlobRepo, csid: ChangesetId) -> BoxFuture { let mapping = Self::mapping(&ctx, &repo); - derive_impl::derive_impl::(ctx, repo, mapping, csid).boxify() + derive_impl::derive_impl::( + ctx, + repo, + mapping, + csid, + Mode::OnlyIfEnabled, + ) + .boxify() + } + + /// Derives derived data even if it's disabled in the config. Should normally + /// be used only for backfilling. + fn derive_with_mode( + ctx: CoreContext, + repo: BlobRepo, + csid: ChangesetId, + mode: Mode, + ) -> BoxFuture { + let mapping = Self::mapping(&ctx, &repo); + derive_impl::derive_impl::(ctx, repo, mapping, csid, mode).boxify() } /// Returns min(number of ancestors of `csid` to be derived, `limit`) + /// + /// This function fails immediately if derived data is not enabled for this repo. fn count_underived( ctx: &CoreContext, repo: &BlobRepo, @@ -66,9 +98,16 @@ pub trait BonsaiDerived: Sized + 'static + Send + Sync + Clone { limit: u64, ) -> BoxFuture { let mapping = Self::mapping(&ctx, &repo); - derive_impl::find_underived::(ctx, repo, &mapping, csid, Some(limit)) - .map(|underived| underived.len() as u64) - .boxify() + derive_impl::find_underived::( + ctx, + repo, + &mapping, + csid, + Some(limit), + Mode::OnlyIfEnabled, + ) + .map(|underived| underived.len() as u64) + .boxify() } } diff --git a/eden/mononoke/derived_data/utils/lib.rs b/eden/mononoke/derived_data/utils/lib.rs index d00e7874dc..6384586f25 100644 --- a/eden/mononoke/derived_data/utils/lib.rs +++ b/eden/mononoke/derived_data/utils/lib.rs @@ -14,7 +14,8 @@ use cloned::cloned; use context::CoreContext; use deleted_files_manifest::{RootDeletedManifestId, RootDeletedManifestMapping}; use derived_data::{ - derive_impl::derive_impl, BonsaiDerived, BonsaiDerivedMapping, RegenerateMapping, + derive_impl::derive_impl, BonsaiDerived, BonsaiDerivedMapping, Mode as DeriveMode, + RegenerateMapping, }; use derived_data_filenodes::{FilenodesOnlyPublic, FilenodesOnlyPublicMapping}; use fastlog::{RootFastlog, RootFastlogMapping}; @@ -73,12 +74,13 @@ pub trait DerivedUtils: Send + Sync + 'static { #[derive(Clone)] struct DerivedUtilsFromMapping { mapping: RegenerateMapping, + mode: DeriveMode, } impl DerivedUtilsFromMapping { - fn new(mapping: M) -> Self { + fn new(mapping: M, mode: DeriveMode) -> Self { let mapping = RegenerateMapping::new(mapping); - Self { mapping } + Self { mapping, mode } } } @@ -93,7 +95,7 @@ where repo: BlobRepo, csid: ChangesetId, ) -> BoxFuture { - ::derive(ctx.clone(), repo, csid) + ::derive_with_mode(ctx.clone(), repo, csid, self.mode) .map(|result| format!("{:?}", result)) .boxify() } @@ -132,6 +134,7 @@ where repo.clone(), in_memory_mapping.clone(), csid, + DeriveMode::Unsafe, ) .map(|_| ()) } @@ -236,38 +239,52 @@ where } pub fn derived_data_utils( - _ctx: CoreContext, repo: BlobRepo, name: impl AsRef, +) -> Result, Error> { + derived_data_utils_impl(repo, name, DeriveMode::OnlyIfEnabled) +} + +pub fn derived_data_utils_unsafe( + repo: BlobRepo, + name: impl AsRef, +) -> Result, Error> { + derived_data_utils_impl(repo, name, DeriveMode::Unsafe) +} + +fn derived_data_utils_impl( + repo: BlobRepo, + name: impl AsRef, + mode: DeriveMode, ) -> Result, Error> { match name.as_ref() { RootUnodeManifestId::NAME => { let mapping = RootUnodeManifestMapping::new(repo.get_blobstore()); - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) } RootFastlog::NAME => { let mapping = RootFastlogMapping::new(repo.get_blobstore().boxed()); - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) } MappedHgChangesetId::NAME => { let mapping = HgChangesetIdMapping::new(&repo); - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) } RootFsnodeId::NAME => { let mapping = RootFsnodeMapping::new(repo.get_blobstore()); - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) } BlameRoot::NAME => { let mapping = BlameRootMapping::new(repo.get_blobstore().boxed()); - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) } RootDeletedManifestId::NAME => { let mapping = RootDeletedManifestMapping::new(repo.get_blobstore()); - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) } FilenodesOnlyPublic::NAME => { let mapping = FilenodesOnlyPublicMapping::new(repo); - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) } name => Err(format_err!("Unsupported derived data type: {}", name)), } diff --git a/eden/mononoke/tests/integration/library.sh b/eden/mononoke/tests/integration/library.sh index 041e1116b9..06524334ce 100644 --- a/eden/mononoke/tests/integration/library.sh +++ b/eden/mononoke/tests/integration/library.sh @@ -725,6 +725,18 @@ allow_writes = ${INFINITEPUSH_ALLOW_WRITES:-true} ${namespace} CONFIG fi + +if [[ -v ENABLED_DERIVED_DATA ]]; then + cat >> "repos/$reponame/server.toml" <> "repos/$reponame/server.toml" <