diff --git a/eden/mononoke/blobrepo/factory/src/lib.rs b/eden/mononoke/blobrepo/factory/src/lib.rs index 531e7efd75..c95a5190f7 100644 --- a/eden/mononoke/blobrepo/factory/src/lib.rs +++ b/eden/mononoke/blobrepo/factory/src/lib.rs @@ -36,13 +36,13 @@ use filestore::FilestoreConfig; use fsnodes::RootFsnodeId; use futures::{compat::Future01CompatExt, future, try_join}; use git_types::TreeHandle; -use maplit::btreeset; +use maplit::hashset; use memblob::Memblob; use mercurial_derived_data::MappedHgChangesetId; use mercurial_mutation::{HgMutationStore, SqlHgMutationStoreBuilder}; use metaconfig_types::{ - self, CensoredScubaParams, DerivedDataConfig, FilestoreParams, Redaction, RepoConfig, - SegmentedChangelogConfig, StorageConfig, UnodeVersion, + self, CensoredScubaParams, DerivedDataConfig, DerivedDataTypesConfig, FilestoreParams, + Redaction, RepoConfig, SegmentedChangelogConfig, StorageConfig, UnodeVersion, }; use mononoke_types::RepositoryId; use newfilenodes::NewFilenodesBuilder; @@ -418,20 +418,23 @@ pub fn new_memblob_empty_with_id( pub fn init_all_derived_data() -> DerivedDataConfig { DerivedDataConfig { scuba_table: None, - derived_data_types: btreeset! { - BlameRoot::NAME.to_string(), - FilenodesOnlyPublic::NAME.to_string(), - ChangesetInfo::NAME.to_string(), - RootFastlog::NAME.to_string(), - RootFsnodeId::NAME.to_string(), - RootSkeletonManifestId::NAME.to_string(), - RootDeletedManifestId::NAME.to_string(), - RootUnodeManifestId::NAME.to_string(), - TreeHandle::NAME.to_string(), - MappedHgChangesetId::NAME.to_string(), + enabled: DerivedDataTypesConfig { + types: hashset! { + BlameRoot::NAME.to_string(), + FilenodesOnlyPublic::NAME.to_string(), + ChangesetInfo::NAME.to_string(), + RootFastlog::NAME.to_string(), + RootFsnodeId::NAME.to_string(), + RootSkeletonManifestId::NAME.to_string(), + RootDeletedManifestId::NAME.to_string(), + RootUnodeManifestId::NAME.to_string(), + TreeHandle::NAME.to_string(), + MappedHgChangesetId::NAME.to_string(), + }, + unode_version: UnodeVersion::V2, + ..Default::default() }, - unode_version: UnodeVersion::V2, - override_blame_filesize_limit: None, + backfilling: DerivedDataTypesConfig::default(), } } diff --git a/eden/mononoke/bookmarks/bookmarks_movement/src/affected_changesets.rs b/eden/mononoke/bookmarks/bookmarks_movement/src/affected_changesets.rs index e9f40f294e..d1dff09788 100644 --- a/eden/mononoke/bookmarks/bookmarks_movement/src/affected_changesets.rs +++ b/eden/mononoke/bookmarks/bookmarks_movement/src/affected_changesets.rs @@ -297,8 +297,7 @@ impl AffectedChangesets { && tunables().get_check_case_conflicts_on_bookmark_movement() && repo .get_derived_data_config() - .derived_data_types - .contains(RootSkeletonManifestId::NAME) + .is_enabled(RootSkeletonManifestId::NAME) { self.load_additional_changesets( ctx, diff --git a/eden/mononoke/bookmarks/warm_bookmarks_cache/lib.rs b/eden/mononoke/bookmarks/warm_bookmarks_cache/lib.rs index f88f47a262..43f982410b 100644 --- a/eden/mononoke/bookmarks/warm_bookmarks_cache/lib.rs +++ b/eden/mononoke/bookmarks/warm_bookmarks_cache/lib.rs @@ -7,7 +7,7 @@ #![deny(warnings)] -use std::collections::{BTreeSet, HashMap, VecDeque}; +use std::collections::{HashMap, HashSet, VecDeque}; use std::sync::{Arc, RwLock}; use std::time::Duration; @@ -92,13 +92,13 @@ impl<'a> WarmBookmarksCacheBuilder<'a> { } pub fn add_all_derived_data_warmers(&mut self) -> Result<(), Error> { - self.add_derived_data_warmers(&self.repo.get_derived_data_config().derived_data_types) + self.add_derived_data_warmers(&self.repo.get_derived_data_config().enabled.types) } - pub fn add_derived_data_warmers(&mut self, types: &BTreeSet) -> Result<(), Error> { - let derived_data_types = &self.repo.get_derived_data_config().derived_data_types; + pub fn add_derived_data_warmers(&mut self, types: &HashSet) -> Result<(), Error> { + let config = &self.repo.get_derived_data_config(); for ty in types { - if !derived_data_types.contains(ty) { + if !config.is_enabled(ty) { return Err(anyhow!("{} is not enabled for {}", ty, self.repo.name())); } } diff --git a/eden/mononoke/cmds/backfill_derived_data/main.rs b/eden/mononoke/cmds/backfill_derived_data/main.rs index e4f0d7c23c..3d2c54cbb4 100644 --- a/eden/mononoke/cmds/backfill_derived_data/main.rs +++ b/eden/mononoke/cmds/backfill_derived_data/main.rs @@ -26,7 +26,8 @@ use cmdlib::{ use context::CoreContext; use derived_data::BonsaiDerivable; use derived_data_utils::{ - derived_data_utils, derived_data_utils_unsafe, DerivedUtils, ThinOut, POSSIBLE_DERIVED_TYPES, + derived_data_utils, derived_data_utils_for_backfill, DerivedUtils, ThinOut, + POSSIBLE_DERIVED_TYPES, }; use fbinit::FacebookInit; use fsnodes::RootFsnodeId; @@ -36,12 +37,11 @@ use futures::{ stream::{self, StreamExt, TryStreamExt}, }; use futures_stats::TimedFutureExt; -use metaconfig_types::DerivedDataConfig; use mononoke_types::{ChangesetId, DateTime}; use slog::{info, Logger}; use stats::prelude::*; use std::{ - collections::{BTreeSet, HashMap}, + collections::{BTreeSet, HashMap, HashSet}, fs, path::Path, sync::Arc, @@ -268,7 +268,10 @@ async fn run_subcmd<'a>( (SUBCOMMAND_BACKFILL_ALL, Some(sub_m)) => { let repo = args::open_repo_unredacted(fb, logger, matches).await?; let derived_data_types = sub_m.values_of(ARG_DERIVED_DATA_TYPE).map_or_else( - || repo.get_derived_data_config().derived_data_types.clone(), + || { + let config = repo.get_derived_data_config(); + &config.enabled.types | &config.backfilling.types + }, |names| names.map(ToString::to_string).collect(), ); subcommand_backfill_all(&ctx, &repo, derived_data_types).await @@ -299,19 +302,9 @@ async fn run_subcmd<'a>( .map(|limit| limit.parse::()) .transpose()?; - let repo = + let mut repo = open_repo_maybe_unredacted(fb, &logger, &matches, &derived_data_type).await?; - // Backfill is used when when a derived data type is not enabled yet, and so - // any attempt to call BonsaiDerived::derive() fails. However calling - // BonsaiDerived::derive() might be useful, and so the lines below explicitly - // enable `derived_data_type` to allow calling BonsaiDerived::derive() if necessary. - let mut repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| { - derived_data_config - .derived_data_types - .insert(derived_data_type.clone()); - derived_data_config - }); info!( ctx.logger(), "reading all changesets for: {:?}", @@ -359,7 +352,7 @@ async fn run_subcmd<'a>( subcommand_backfill( &ctx, &repo, - &derived_data_type, + derived_data_type.as_str(), regenerate, changesets, cleaner, @@ -406,9 +399,10 @@ async fn run_subcmd<'a>( let repo = args::open_repo_unredacted(fb, logger, matches).await?; let types = repo .get_derived_data_config() - .derived_data_types - .clone() - .into_iter() + .enabled + .types + .iter() + .cloned() .collect(); (repo, types) } @@ -450,12 +444,12 @@ fn parse_serialized_commits>(file: P) -> Result, + derived_data_types: HashSet, ) -> Result<()> { info!(ctx.logger(), "derived data types: {:?}", derived_data_types); let derivers = derived_data_types .iter() - .map(|name| derived_data_utils_unsafe(repo, name.as_str())) + .map(|name| derived_data_utils_for_backfill(repo, name.as_str())) .collect::, _>>()?; tail_batch_iteration(ctx, repo, derivers.as_ref()).await } @@ -467,12 +461,12 @@ fn truncate_duration(duration: Duration) -> Duration { async fn subcommand_backfill( ctx: &CoreContext, repo: &BlobRepo, - derived_data_type: &String, + derived_data_type: &str, regenerate: bool, changesets: Vec, mut cleaner: Option, ) -> Result<()> { - let derived_utils = &derived_data_utils_unsafe(repo, derived_data_type.as_str())?; + let derived_utils = &derived_data_utils_for_backfill(repo, derived_data_type)?; info!( ctx.logger(), @@ -571,9 +565,9 @@ async fn subcommand_tail( let derive_utils: Vec> = unredacted_repo .get_derived_data_config() - .derived_data_types - .clone() - .into_iter() + .enabled + .types + .iter() .map(|name| derived_data_utils(&unredacted_repo, name)) .collect::>()?; slog::info!( diff --git a/eden/mononoke/derived_data/blame/derived.rs b/eden/mononoke/derived_data/blame/derived.rs index d1f2db6edb..e8df1ce6ea 100644 --- a/eden/mononoke/derived_data/blame/derived.rs +++ b/eden/mononoke/derived_data/blame/derived.rs @@ -14,6 +14,7 @@ use cloned::cloned; use context::CoreContext; use derived_data::{ impl_bonsai_derived_mapping, BlobstoreExistsMapping, BonsaiDerivable, BonsaiDerived, + DerivedDataTypesConfig, }; use filestore::{self, FetchKey}; use futures::{future, StreamExt, TryFutureExt, TryStreamExt}; @@ -119,13 +120,9 @@ pub struct BlameRootMapping { impl BlobstoreExistsMapping for BlameRootMapping { type Value = BlameRoot; - fn new(repo: &BlobRepo) -> Result { - let options = BlameDeriveOptions { - filesize_limit: repo - .get_derived_data_config() - .override_blame_filesize_limit - .unwrap_or(BLAME_FILESIZE_LIMIT), - }; + fn new(repo: &BlobRepo, config: &DerivedDataTypesConfig) -> Result { + let filesize_limit = config.blame_filesize_limit.unwrap_or(BLAME_FILESIZE_LIMIT); + let options = BlameDeriveOptions { filesize_limit }; Ok(Self { blobstore: repo.get_blobstore().boxed(), options, diff --git a/eden/mononoke/derived_data/blame/lib.rs b/eden/mononoke/derived_data/blame/lib.rs index 24d8451e76..18fb40e25d 100644 --- a/eden/mononoke/derived_data/blame/lib.rs +++ b/eden/mononoke/derived_data/blame/lib.rs @@ -9,7 +9,7 @@ #![type_length_limit = "1441792"] mod derived; -pub use derived::{fetch_file_full_content, BlameRoot, BlameRootMapping}; +pub use derived::{fetch_file_full_content, BlameRoot, BlameRootMapping, BLAME_FILESIZE_LIMIT}; #[cfg(test)] mod tests; diff --git a/eden/mononoke/derived_data/blame/tests.rs b/eden/mononoke/derived_data/blame/tests.rs index fc639135c7..d14356b4c5 100644 --- a/eden/mononoke/derived_data/blame/tests.rs +++ b/eden/mononoke/derived_data/blame/tests.rs @@ -247,7 +247,7 @@ async fn test_blame_file_size_limit_rejected(fb: FacebookInit) -> Result<(), Err fetch_blame(ctx, repo, c1, MPath::new(file1)?).await?; let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| { - derived_data_config.override_blame_filesize_limit = Some(4); + derived_data_config.enabled.blame_filesize_limit = Some(4); derived_data_config }); diff --git a/eden/mononoke/derived_data/changeset_info/derive.rs b/eden/mononoke/derived_data/changeset_info/derive.rs index 2da2e9f23e..1625e08021 100644 --- a/eden/mononoke/derived_data/changeset_info/derive.rs +++ b/eden/mononoke/derived_data/changeset_info/derive.rs @@ -12,7 +12,9 @@ use async_trait::async_trait; use blobrepo::BlobRepo; use blobstore::Blobstore; use context::CoreContext; -use derived_data::{impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable}; +use derived_data::{ + impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, DerivedDataTypesConfig, +}; use mononoke_types::BonsaiChangeset; use crate::ChangesetInfo; @@ -44,7 +46,7 @@ pub struct ChangesetInfoMapping { impl BlobstoreRootIdMapping for ChangesetInfoMapping { type Value = ChangesetInfo; - fn new(repo: &BlobRepo) -> Result { + fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result { Ok(Self { blobstore: repo.get_blobstore().boxed(), }) diff --git a/eden/mononoke/derived_data/deleted_files_manifest/mapping.rs b/eden/mononoke/derived_data/deleted_files_manifest/mapping.rs index a5825cc169..cfa15f7b6e 100644 --- a/eden/mononoke/derived_data/deleted_files_manifest/mapping.rs +++ b/eden/mononoke/derived_data/deleted_files_manifest/mapping.rs @@ -12,7 +12,9 @@ use blobrepo::BlobRepo; use blobstore::{Blobstore, BlobstoreGetData}; use bytes::Bytes; use context::CoreContext; -use derived_data::{impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable}; +use derived_data::{ + impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, DerivedDataTypesConfig, +}; use mononoke_types::{BlobstoreBytes, BonsaiChangeset, DeletedManifestId}; use repo_blobstore::RepoBlobstore; use std::convert::{TryFrom, TryInto}; @@ -85,7 +87,7 @@ pub struct RootDeletedManifestMapping { impl BlobstoreRootIdMapping for RootDeletedManifestMapping { type Value = RootDeletedManifestId; - fn new(repo: &BlobRepo) -> Result { + fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result { Ok(Self { blobstore: repo.get_blobstore(), }) diff --git a/eden/mononoke/derived_data/deleted_files_manifest/ops.rs b/eden/mononoke/derived_data/deleted_files_manifest/ops.rs index de3a3dfebb..ffdd4d2bec 100644 --- a/eden/mononoke/derived_data/deleted_files_manifest/ops.rs +++ b/eden/mononoke/derived_data/deleted_files_manifest/ops.rs @@ -64,8 +64,7 @@ pub async fn resolve_path_state( let use_deleted_manifest = repo .get_derived_data_config() - .derived_data_types - .contains(RootDeletedManifestId::NAME); + .is_enabled(RootDeletedManifestId::NAME); if !use_deleted_manifest { return Ok(None); } diff --git a/eden/mononoke/derived_data/fastlog/mapping.rs b/eden/mononoke/derived_data/fastlog/mapping.rs index dc4880f246..ad653b69e0 100644 --- a/eden/mononoke/derived_data/fastlog/mapping.rs +++ b/eden/mononoke/derived_data/fastlog/mapping.rs @@ -13,6 +13,7 @@ use cloned::cloned; use context::CoreContext; use derived_data::{ impl_bonsai_derived_mapping, BlobstoreExistsMapping, BonsaiDerivable, BonsaiDerived, + DerivedDataTypesConfig, }; use futures::future; use futures::stream::TryStreamExt; @@ -144,7 +145,7 @@ pub struct RootFastlogMapping { impl BlobstoreExistsMapping for RootFastlogMapping { type Value = RootFastlog; - fn new(repo: &BlobRepo) -> Result { + fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result { Ok(Self { blobstore: repo.get_blobstore().boxed(), }) diff --git a/eden/mononoke/derived_data/filenodes/lib.rs b/eden/mononoke/derived_data/filenodes/lib.rs index f2ac714ca7..794a6dd236 100644 --- a/eden/mononoke/derived_data/filenodes/lib.rs +++ b/eden/mononoke/derived_data/filenodes/lib.rs @@ -14,7 +14,9 @@ use blobrepo_hg::BlobRepoHg; use blobstore::Loadable; use borrowed::borrowed; use context::CoreContext; -use derived_data::{BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError}; +use derived_data::{ + BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError, DerivedDataTypesConfig, +}; use filenodes::{FilenodeInfo, FilenodeResult, PreparedFilenode}; use futures::{ compat::Future01CompatExt, future::try_join_all, pin_mut, stream, FutureExt, StreamExt, @@ -307,7 +309,7 @@ pub struct FilenodesOnlyPublicMapping { } impl FilenodesOnlyPublicMapping { - pub fn new(repo: &BlobRepo) -> Result { + pub fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result { Ok(Self { repo: repo.clone() }) } } @@ -320,7 +322,8 @@ impl BonsaiDerived for FilenodesOnlyPublic { _ctx: &CoreContext, repo: &BlobRepo, ) -> Result { - FilenodesOnlyPublicMapping::new(repo) + let config = derived_data::enabled_type_config(repo, Self::NAME)?; + FilenodesOnlyPublicMapping::new(repo, config) } } diff --git a/eden/mononoke/derived_data/fsnodes/mapping.rs b/eden/mononoke/derived_data/fsnodes/mapping.rs index 6da39c71b8..36e4919679 100644 --- a/eden/mononoke/derived_data/fsnodes/mapping.rs +++ b/eden/mononoke/derived_data/fsnodes/mapping.rs @@ -16,7 +16,7 @@ use bytes::Bytes; use context::CoreContext; use derived_data::{ impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, BonsaiDerivedMapping, - DeriveMode, + DerivedDataTypesConfig, }; use futures::stream::{self, StreamExt, TryStreamExt}; use mononoke_types::{ @@ -92,13 +92,10 @@ impl BonsaiDerivable for RootFsnodeId { repo: &BlobRepo, csids: Vec, mapping: &BatchMapping, - mode: DeriveMode, ) -> Result, Error> where BatchMapping: BonsaiDerivedMapping + Send + Sync + Clone + 'static, { - mode.check_if_derive_allowed::(repo)?; - let derived = derive_fsnode_in_batch(ctx, repo, csids.clone()).await?; stream::iter(derived.into_iter().map(|(cs_id, derived)| async move { @@ -123,7 +120,7 @@ pub struct RootFsnodeMapping { impl BlobstoreRootIdMapping for RootFsnodeMapping { type Value = RootFsnodeId; - fn new(repo: &BlobRepo) -> Result { + fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result { Ok(Self { blobstore: repo.get_blobstore(), }) diff --git a/eden/mononoke/derived_data/mercurial_derived_data/mapping.rs b/eden/mononoke/derived_data/mercurial_derived_data/mapping.rs index 1bcae567dc..8c6742b60d 100644 --- a/eden/mononoke/derived_data/mercurial_derived_data/mapping.rs +++ b/eden/mononoke/derived_data/mercurial_derived_data/mapping.rs @@ -16,7 +16,9 @@ use mononoke_types::{BonsaiChangeset, ChangesetId, RepositoryId}; use std::{collections::HashMap, sync::Arc}; -use derived_data::{BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError}; +use derived_data::{ + BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError, DerivedDataTypesConfig, +}; #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct MappedHgChangesetId(pub HgChangesetId); @@ -45,11 +47,11 @@ pub struct HgChangesetIdMapping { } impl HgChangesetIdMapping { - pub fn new(repo: &BlobRepo) -> Self { - Self { + pub fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result { + Ok(Self { repo_id: repo.get_repoid(), mapping: repo.attribute_expected::().clone(), - } + }) } } @@ -99,6 +101,7 @@ impl BonsaiDerived for MappedHgChangesetId { _ctx: &CoreContext, repo: &BlobRepo, ) -> Result { - Ok(HgChangesetIdMapping::new(repo)) + let config = derived_data::enabled_type_config(repo, Self::NAME)?; + Ok(HgChangesetIdMapping::new(repo, config)?) } } diff --git a/eden/mononoke/derived_data/skeleton_manifest/mapping.rs b/eden/mononoke/derived_data/skeleton_manifest/mapping.rs index 843db6f8dc..3ba5c0c828 100644 --- a/eden/mononoke/derived_data/skeleton_manifest/mapping.rs +++ b/eden/mononoke/derived_data/skeleton_manifest/mapping.rs @@ -16,7 +16,7 @@ use bytes::Bytes; use context::CoreContext; use derived_data::{ impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, BonsaiDerivedMapping, - DeriveMode, + DerivedDataTypesConfig, }; use futures::stream::{self, StreamExt, TryStreamExt}; use mononoke_types::{ @@ -94,13 +94,10 @@ impl BonsaiDerivable for RootSkeletonManifestId { repo: &BlobRepo, csids: Vec, mapping: &BatchMapping, - mode: DeriveMode, ) -> Result, Error> where BatchMapping: BonsaiDerivedMapping + Send + Sync + Clone + 'static, { - mode.check_if_derive_allowed::(repo)?; - let derived = derive_skeleton_manifests_in_batch(ctx, repo, csids.clone()).await?; stream::iter(derived.into_iter().map(|(cs_id, derived)| async move { @@ -125,7 +122,7 @@ pub struct RootSkeletonManifestMapping { impl BlobstoreRootIdMapping for RootSkeletonManifestMapping { type Value = RootSkeletonManifestId; - fn new(repo: &BlobRepo) -> Result { + fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result { Ok(Self { blobstore: repo.get_blobstore(), }) diff --git a/eden/mononoke/derived_data/src/derive_impl.rs b/eden/mononoke/derived_data/src/derive_impl.rs index 66ed0ffdcd..828004db27 100644 --- a/eden/mononoke/derived_data/src/derive_impl.rs +++ b/eden/mononoke/derived_data/src/derive_impl.rs @@ -18,7 +18,7 @@ use futures::{ TryStreamExt, }; use futures_stats::{futures03::TimedFutureExt, FutureStats}; -use metaconfig_types::DerivedDataConfig; +use metaconfig_types::{DerivedDataConfig, DerivedDataTypesConfig}; use mononoke_types::ChangesetId; use scuba_ext::MononokeScubaSampleBuilder; use slog::debug; @@ -44,40 +44,23 @@ define_stats! { const LEASE_WARNING_THRESHOLD: Duration = Duration::from_secs(60); -#[derive(Copy, Clone, PartialEq, Eq)] -pub enum DeriveMode { - /// This mode should almost always be preferred - OnlyIfEnabled, - /// This mode should rarely be used, perhaps only for backfilling type of derived data - /// which is not enabled in this repo yet - Unsafe, -} - -impl DeriveMode { - /// Check that derivation is suitable for a particular derived data type - /// in this mode. - /// - /// Returns `DeriveError` if derivation should not proceed. - pub fn check_if_derive_allowed( - self, - repo: &BlobRepo, - ) -> Result<(), DeriveError> { - if self == DeriveMode::OnlyIfEnabled { - if !repo - .get_derived_data_config() - .derived_data_types - .contains(Derivable::NAME) - { - STATS::derived_data_disabled - .add_value(1, (repo.get_repoid().id(), Derivable::NAME)); - return Err(DeriveError::Disabled( - Derivable::NAME, - repo.get_repoid(), - repo.name().clone(), - )); - } - } - Ok(()) +/// Checks that the named derived data type is enabled, and returns the +/// enabled derived data types config if it is. Returns an error if the +/// derived data type is not enabled. +pub fn enabled_type_config<'repo>( + repo: &'repo BlobRepo, + name: &'static str, +) -> Result<&'repo DerivedDataTypesConfig, DeriveError> { + let config = repo.get_derived_data_config(); + if config.enabled.types.contains(name) { + Ok(&config.enabled) + } else { + STATS::derived_data_disabled.add_value(1, (repo.get_repoid().id(), name)); + Err(DeriveError::Disabled( + name, + repo.get_repoid(), + repo.name().clone(), + )) } } @@ -114,12 +97,10 @@ pub async fn derive_impl< repo: &BlobRepo, derived_mapping: &Mapping, start_csid: ChangesetId, - mode: DeriveMode, ) -> Result { let derivation = async { let all_csids = - find_topo_sorted_underived(ctx, repo, derived_mapping, Some(start_csid), None, mode) - .await?; + find_topo_sorted_underived(ctx, repo, derived_mapping, Some(start_csid), None).await?; for csid in &all_csids { ctx.scuba().clone().log_with_msg( @@ -200,10 +181,7 @@ pub(crate) async fn find_topo_sorted_underived< derived_mapping: &Mapping, start_csids: Changesets, limit: Option, - mode: DeriveMode, ) -> Result, Error> { - mode.check_if_derive_allowed::(repo)?; - let changeset_fetcher = repo.get_changeset_fetcher(); // This is necessary to avoid visiting the same commit a lot of times in mergy repos let visited: Arc>> = Arc::new(Mutex::new(HashSet::new())); @@ -703,7 +681,8 @@ mod test { async fn derive_for_master(ctx: CoreContext, repo: BlobRepo) { let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| { derived_data_config - .derived_data_types + .enabled + .types .insert(TestGenNum::NAME.to_string()); derived_data_config }); @@ -748,7 +727,8 @@ mod test { .await .dangerous_override(|mut derived_data_config: DerivedDataConfig| { derived_data_config - .derived_data_types + .enabled + .types .insert(TestGenNum::NAME.to_string()); derived_data_config }) @@ -860,7 +840,8 @@ mod test { let repo = linear::getrepo(fb).await; let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| { derived_data_config - .derived_data_types + .enabled + .types .insert(TestGenNum::NAME.to_string()); derived_data_config }); @@ -874,8 +855,7 @@ mod test { // Reverse them to derive parents before children let cs_ids = cs_ids.clone().into_iter().rev().collect::>(); let mapping = TestGenNum::default_mapping(&ctx, &repo)?; - let derived_batch = - TestGenNum::batch_derive(&ctx, &repo, cs_ids, &mapping, DeriveMode::Unsafe).await?; + let derived_batch = TestGenNum::batch_derive(&ctx, &repo, cs_ids, &mapping).await?; derived_batch .get(&master_cs_id) .unwrap_or_else(|| panic!("{} has not been derived", master_cs_id)) @@ -886,7 +866,8 @@ mod test { let repo = linear::getrepo(fb).await; let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| { derived_data_config - .derived_data_types + .enabled + .types .insert(TestGenNum::NAME.to_string()); derived_data_config }); diff --git a/eden/mononoke/derived_data/src/lib.rs b/eden/mononoke/derived_data/src/lib.rs index 39e218b76f..81b20eb3c2 100644 --- a/eden/mononoke/derived_data/src/lib.rs +++ b/eden/mononoke/derived_data/src/lib.rs @@ -24,9 +24,9 @@ pub mod batch; pub mod derive_impl; pub mod mapping_impl; +pub use derive_impl::enabled_type_config; pub use mapping_impl::{BlobstoreExistsMapping, BlobstoreRootIdMapping}; - -pub use crate::derive_impl::DeriveMode; +pub use metaconfig_types::DerivedDataTypesConfig; #[derive(Debug, Error)] pub enum DeriveError { @@ -72,7 +72,6 @@ pub trait BonsaiDerivable: Sized + 'static + Send + Sync + Clone { repo: &BlobRepo, csids: Vec, mapping: &Mapping, - mode: DeriveMode, ) -> Result, Error> where Mapping: BonsaiDerivedMapping + Send + Sync + Clone + 'static, @@ -83,7 +82,7 @@ pub trait BonsaiDerivable: Sized + 'static + Send + Sync + Clone { // cause O(n^2) derivations. for csid in csids { let derived = - derive_impl::derive_impl::(ctx, repo, mapping, csid, mode).await?; + derive_impl::derive_impl::(ctx, repo, mapping, csid).await?; res.insert(csid, derived); } Ok(res) @@ -120,14 +119,7 @@ pub trait BonsaiDerived: Sized + 'static + Send + Sync + Clone + BonsaiDerivable csid: ChangesetId, ) -> Result { let mapping = Self::default_mapping(&ctx, &repo)?; - derive_impl::derive_impl::( - ctx, - repo, - &mapping, - csid, - DeriveMode::OnlyIfEnabled, - ) - .await + derive_impl::derive_impl::(ctx, repo, &mapping, csid).await } /// Fetch the derived data in cases where we might not want to trigger derivation, e.g. when scrubbing. @@ -156,7 +148,6 @@ pub trait BonsaiDerived: Sized + 'static + Send + Sync + Clone + BonsaiDerivable &mapping, Some(*csid), Some(limit), - DeriveMode::OnlyIfEnabled, ) .await?; Ok(underived.len() as u64) @@ -173,12 +164,7 @@ pub trait BonsaiDerived: Sized + 'static + Send + Sync + Clone + BonsaiDerivable ) -> Result, DeriveError> { let mapping = Self::default_mapping(&ctx, &repo)?; let underived = derive_impl::find_topo_sorted_underived::( - ctx, - repo, - &mapping, - csids, - None, - DeriveMode::OnlyIfEnabled, + ctx, repo, &mapping, csids, None, ) .await?; Ok(underived) diff --git a/eden/mononoke/derived_data/src/mapping_impl.rs b/eden/mononoke/derived_data/src/mapping_impl.rs index 8018c6155f..7319cf9aad 100644 --- a/eden/mononoke/derived_data/src/mapping_impl.rs +++ b/eden/mononoke/derived_data/src/mapping_impl.rs @@ -14,6 +14,7 @@ use blobrepo::BlobRepo; use blobstore::{Blobstore, BlobstoreBytes, BlobstoreGetData}; use context::CoreContext; use futures::stream::{FuturesUnordered, TryStreamExt}; +use metaconfig_types::DerivedDataTypesConfig; use mononoke_types::ChangesetId; use crate::{BonsaiDerivable, BonsaiDerived}; @@ -31,7 +32,7 @@ pub trait BlobstoreRootIdMapping { + Sized; /// Create a new instance of this mapping. - fn new(repo: &BlobRepo) -> Result + fn new(repo: &BlobRepo, config: &DerivedDataTypesConfig) -> Result where Self: Sized; @@ -92,7 +93,7 @@ pub trait BlobstoreExistsMapping { type Value: BonsaiDerived + From + Send + Sync + Clone; /// Create a new instance of this mapping. - fn new(repo: &BlobRepo) -> Result + fn new(repo: &BlobRepo, config: &DerivedDataTypesConfig) -> Result where Self: Sized; @@ -212,7 +213,8 @@ macro_rules! impl_bonsai_derived_mapping { _ctx: &::context::CoreContext, repo: &::blobrepo::BlobRepo, ) -> ::std::result::Result { - ::std::result::Result::Ok(<$mapping as $mapping_impl>::new(repo)?) + let config = $crate::derive_impl::enabled_type_config(repo, Self::NAME)?; + ::std::result::Result::Ok(<$mapping as $mapping_impl>::new(repo, config)?) } } }; diff --git a/eden/mononoke/derived_data/unodes/derive.rs b/eden/mononoke/derived_data/unodes/derive.rs index bbdecc3daf..0db53e85d4 100644 --- a/eden/mononoke/derived_data/unodes/derive.rs +++ b/eden/mononoke/derived_data/unodes/derive.rs @@ -644,7 +644,7 @@ mod tests { // Unodes v1 should create a new one that points to the parent unode let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| { - derived_data_config.unode_version = UnodeVersion::V1; + derived_data_config.enabled.unode_version = UnodeVersion::V1; derived_data_config }); let (p1_unodes, merge_unodes) = find_unodes(ctx.clone(), repo.clone()).await?; diff --git a/eden/mononoke/derived_data/unodes/lib.rs b/eden/mononoke/derived_data/unodes/lib.rs index 56e4e3cd76..01ebe45c9d 100644 --- a/eden/mononoke/derived_data/unodes/lib.rs +++ b/eden/mononoke/derived_data/unodes/lib.rs @@ -7,7 +7,7 @@ #![deny(warnings)] -use anyhow::Error; +use anyhow::{Error, Result}; use blobrepo::BlobRepo; use cloned::cloned; use context::CoreContext; diff --git a/eden/mononoke/derived_data/unodes/mapping.rs b/eden/mononoke/derived_data/unodes/mapping.rs index c0c944324d..14fac5c884 100644 --- a/eden/mononoke/derived_data/unodes/mapping.rs +++ b/eden/mononoke/derived_data/unodes/mapping.rs @@ -12,7 +12,9 @@ use blobrepo::BlobRepo; use blobstore::{Blobstore, BlobstoreGetData}; use bytes::Bytes; use context::CoreContext; -use derived_data::{impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable}; +use derived_data::{ + impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, DerivedDataTypesConfig, +}; use futures::TryFutureExt; use metaconfig_types::UnodeVersion; use mononoke_types::{ @@ -93,11 +95,10 @@ pub struct RootUnodeManifestMapping { impl BlobstoreRootIdMapping for RootUnodeManifestMapping { type Value = RootUnodeManifestId; - fn new(repo: &BlobRepo) -> Result { - let unode_version = repo.get_derived_data_config().unode_version; + fn new(repo: &BlobRepo, config: &DerivedDataTypesConfig) -> Result { Ok(Self { blobstore: repo.get_blobstore(), - unode_version, + unode_version: config.unode_version, }) } diff --git a/eden/mononoke/derived_data/utils/lib.rs b/eden/mononoke/derived_data/utils/lib.rs index b8a5555edf..9c9789efe4 100644 --- a/eden/mononoke/derived_data/utils/lib.rs +++ b/eden/mononoke/derived_data/utils/lib.rs @@ -20,7 +20,7 @@ use context::CoreContext; use deleted_files_manifest::{RootDeletedManifestId, RootDeletedManifestMapping}; use derived_data::{ derive_impl::derive_impl, BlobstoreExistsMapping, BlobstoreRootIdMapping, BonsaiDerivable, - BonsaiDerived, BonsaiDerivedMapping, DeriveError, DeriveMode, RegenerateMapping, + BonsaiDerived, BonsaiDerivedMapping, DeriveError, DerivedDataTypesConfig, RegenerateMapping, }; use derived_data_filenodes::{FilenodesOnlyPublic, FilenodesOnlyPublicMapping}; use fastlog::{RootFastlog, RootFastlogMapping}; @@ -170,13 +170,12 @@ pub trait DerivedUtils: Send + Sync + 'static { #[derive(Clone)] struct DerivedUtilsFromMapping { mapping: RegenerateMapping, - mode: DeriveMode, } impl DerivedUtilsFromMapping { - fn new(mapping: M, mode: DeriveMode) -> Self { + fn new(mapping: M) -> Self { let mapping = RegenerateMapping::new(mapping); - Self { mapping, mode } + Self { mapping } } } @@ -196,9 +195,9 @@ where // `self.mapping` there. This will allow us to // e.g. regenerate derived data for the commit // even if it was already generated (see RegenerateMapping call). - cloned!(self.mapping, self.mode); + cloned!(self.mapping); async move { - let result = derive_impl::(&ctx, &repo, &mapping, csid, mode).await?; + let result = derive_impl::(&ctx, &repo, &mapping, csid).await?; Ok(format!("{:?}", result)) } .boxed() @@ -242,14 +241,7 @@ where for csid in csids { // create new context so each derivation batch would have its own trace let ctx = CoreContext::new_with_logger(ctx.fb, ctx.logger().clone()); - derive_impl::( - &ctx, - &repo, - &in_memory_mapping, - csid, - DeriveMode::Unsafe, - ) - .await?; + derive_impl::(&ctx, &repo, &in_memory_mapping, csid).await?; } // flush blobstore @@ -403,58 +395,76 @@ pub fn derived_data_utils( repo: &BlobRepo, name: impl AsRef, ) -> Result, Error> { - derived_data_utils_impl(repo, name, DeriveMode::OnlyIfEnabled) + let name = name.as_ref(); + let config = repo.get_derived_data_config(); + let types_config = if config.enabled.types.contains(name) { + &config.enabled + } else { + return Err(anyhow!("Derived data type {} is not configured", name)); + }; + derived_data_utils_impl(repo, name, types_config) } -pub fn derived_data_utils_unsafe( +pub fn derived_data_utils_for_backfill( repo: &BlobRepo, name: impl AsRef, ) -> Result, Error> { - derived_data_utils_impl(repo, name, DeriveMode::Unsafe) + let name = name.as_ref(); + let config = repo.get_derived_data_config(); + let types_config = if config.backfilling.types.contains(name) { + &config.backfilling + } else if config.enabled.types.contains(name) { + &config.enabled + } else { + return Err(anyhow!( + "Derived data type {} is not configured for backfilling", + name + )); + }; + derived_data_utils_impl(repo, name, types_config) } fn derived_data_utils_impl( repo: &BlobRepo, - name: impl AsRef, - mode: DeriveMode, + name: &str, + config: &DerivedDataTypesConfig, ) -> Result, Error> { - let repo = &repo; - match name.as_ref() { + match name { RootUnodeManifestId::NAME => { - let mapping = RootUnodeManifestMapping::new(repo)?; - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) + let mapping = RootUnodeManifestMapping::new(repo, config)?; + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) } RootFastlog::NAME => { - let mapping = RootFastlogMapping::new(repo)?; - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) + let mapping = RootFastlogMapping::new(repo, config)?; + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) } MappedHgChangesetId::NAME => { - let mapping = HgChangesetIdMapping::new(repo); - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) + let mapping = HgChangesetIdMapping::new(repo, config)?; + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) } RootFsnodeId::NAME => { - let mapping = RootFsnodeMapping::new(repo)?; - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) + let mapping = RootFsnodeMapping::new(repo, config)?; + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) } BlameRoot::NAME => { - let mapping = BlameRootMapping::new(repo)?; - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) + let mapping = BlameRootMapping::new(repo, config)?; + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) } ChangesetInfo::NAME => { - let mapping = ChangesetInfoMapping::new(repo)?; - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) + let mapping = ChangesetInfoMapping::new(repo, config)?; + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) } RootDeletedManifestId::NAME => { - let mapping = RootDeletedManifestMapping::new(repo)?; - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) + let mapping = RootDeletedManifestMapping::new(repo, config)?; + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) } FilenodesOnlyPublic::NAME => { - let mapping = FilenodesOnlyPublicMapping::new(repo)?; - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) + let mapping = FilenodesOnlyPublicMapping::new(repo, config)?; + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) } RootSkeletonManifestId::NAME => { - let mapping = RootSkeletonManifestMapping::new(repo)?; - Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode))) + let mapping = RootSkeletonManifestMapping::new(repo, config)?; + Ok(Arc::new(DerivedUtilsFromMapping::new(mapping))) } name => Err(format_err!("Unsupported derived data type: {}", name)), } diff --git a/eden/mononoke/git/git_types/src/derive_tree.rs b/eden/mononoke/git/git_types/src/derive_tree.rs index 190651bc2c..fe91a58c0f 100644 --- a/eden/mononoke/git/git_types/src/derive_tree.rs +++ b/eden/mononoke/git/git_types/src/derive_tree.rs @@ -20,7 +20,9 @@ use std::sync::Arc; use blobrepo::BlobRepo; use blobstore::{Blobstore, Storable}; -use derived_data::{BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError}; +use derived_data::{ + BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError, DerivedDataTypesConfig, +}; use filestore::{self, FetchKey}; use mononoke_types::{BonsaiChangeset, ChangesetId, MPath}; @@ -33,7 +35,7 @@ pub struct TreeMapping { } impl TreeMapping { - pub fn new(blobstore: Arc) -> Self { + pub fn new(blobstore: Arc, _config: &DerivedDataTypesConfig) -> Self { Self { blobstore } } @@ -113,7 +115,8 @@ impl BonsaiDerived for TreeHandle { _ctx: &CoreContext, repo: &BlobRepo, ) -> Result { - Ok(TreeMapping::new(repo.blobstore().boxed())) + let config = derived_data::enabled_type_config(repo, Self::NAME)?; + Ok(TreeMapping::new(repo.blobstore().boxed(), config)) } } diff --git a/eden/mononoke/metaconfig/parser/src/config.rs b/eden/mononoke/metaconfig/parser/src/config.rs index 6d21769e46..a7bc561ea2 100644 --- a/eden/mononoke/metaconfig/parser/src/config.rs +++ b/eden/mononoke/metaconfig/parser/src/config.rs @@ -431,18 +431,18 @@ mod test { use ascii::AsciiString; use bookmarks_types::BookmarkName; use cached_config::TestSource; - use maplit::{btreemap, btreeset, hashmap}; + use maplit::{btreemap, hashmap, hashset}; use metaconfig_types::{ BlobConfig, BlobstoreId, BookmarkParams, Bundle2ReplayParams, CacheWarmupParams, CommitSyncConfigVersion, CommitSyncDirection, DatabaseConfig, - DefaultSmallToLargeCommitSyncPathAction, DerivedDataConfig, FilestoreParams, HookBypass, - HookConfig, HookManagerParams, HookParams, InfinitepushNamespace, InfinitepushParams, - LfsParams, LocalDatabaseConfig, MetadataDatabaseConfig, MultiplexId, MultiplexedStoreType, - PushParams, PushrebaseFlags, PushrebaseParams, RemoteDatabaseConfig, - RemoteMetadataDatabaseConfig, RepoClientKnobs, SegmentedChangelogConfig, - ShardableRemoteDatabaseConfig, ShardedRemoteDatabaseConfig, SmallRepoCommitSyncConfig, - SourceControlServiceMonitoring, SourceControlServiceParams, UnodeVersion, - WireprotoLoggingConfig, + DefaultSmallToLargeCommitSyncPathAction, DerivedDataConfig, DerivedDataTypesConfig, + FilestoreParams, HookBypass, HookConfig, HookManagerParams, HookParams, + InfinitepushNamespace, InfinitepushParams, LfsParams, LocalDatabaseConfig, + MetadataDatabaseConfig, MultiplexId, MultiplexedStoreType, PushParams, PushrebaseFlags, + PushrebaseParams, RemoteDatabaseConfig, RemoteMetadataDatabaseConfig, RepoClientKnobs, + SegmentedChangelogConfig, ShardableRemoteDatabaseConfig, ShardedRemoteDatabaseConfig, + SmallRepoCommitSyncConfig, SourceControlServiceMonitoring, SourceControlServiceParams, + UnodeVersion, WireprotoLoggingConfig, }; use mononoke_types::MPath; use nonzero_ext::nonzero; @@ -719,12 +719,10 @@ mod test { all_hooks_bypassed=false bypassed_commits_scuba_table="commits_bypassed_hooks" - [derived_data_config] - derived_data_types=["fsnodes"] - override_blame_filesize_limit=101 - - [derived_data_config.raw_unode_version] - unode_version_v2 = {} + [derived_data_config.enabled] + types = ["fsnodes", "unodes", "blame"] + unode_version = 2 + blame_filesize_limit = 101 [storage.main.metadata.remote] primary = { db_address = "db_address" } @@ -1025,10 +1023,17 @@ mod test { ], }), derived_data_config: DerivedDataConfig { - derived_data_types: btreeset![String::from("fsnodes")], + enabled: DerivedDataTypesConfig { + types: hashset! { + String::from("fsnodes"), + String::from("unodes"), + String::from("blame"), + }, + unode_version: UnodeVersion::V2, + blame_filesize_limit: Some(101), + }, + backfilling: DerivedDataTypesConfig::default(), scuba_table: None, - unode_version: UnodeVersion::V2, - override_blame_filesize_limit: Some(101), }, hgsql_name: HgsqlName("fbsource".to_string()), hgsql_globalrevs_name: HgsqlGlobalrevsName("fbsource".to_string()), diff --git a/eden/mononoke/metaconfig/parser/src/convert/mod.rs b/eden/mononoke/metaconfig/parser/src/convert/mod.rs index 723f3410ae..600273f769 100644 --- a/eden/mononoke/metaconfig/parser/src/convert/mod.rs +++ b/eden/mononoke/metaconfig/parser/src/convert/mod.rs @@ -16,7 +16,7 @@ pub trait Convert { /// Conversion target type Output; - /// Try to convert `self` into `Self::Outpuf` + /// Try to convert `self` into `Self::Output` fn convert(self) -> Result; } diff --git a/eden/mononoke/metaconfig/parser/src/convert/repo.rs b/eden/mononoke/metaconfig/parser/src/convert/repo.rs index 0b9565454a..a1358dff78 100644 --- a/eden/mononoke/metaconfig/parser/src/convert/repo.rs +++ b/eden/mononoke/metaconfig/parser/src/convert/repo.rs @@ -12,20 +12,20 @@ use anyhow::{anyhow, Context, Result}; use bookmarks_types::BookmarkName; use metaconfig_types::{ BookmarkOrRegex, BookmarkParams, Bundle2ReplayParams, CacheWarmupParams, - CommitcloudBookmarksFillerMode, ComparableRegex, DerivedDataConfig, HookBypass, HookConfig, - HookManagerParams, HookParams, InfinitepushNamespace, InfinitepushParams, LfsParams, - PushParams, PushrebaseFlags, PushrebaseParams, RepoClientKnobs, SegmentedChangelogConfig, - ServiceWriteRestrictions, SourceControlServiceMonitoring, SourceControlServiceParams, - StorageConfig, UnodeVersion, WireprotoLoggingConfig, + CommitcloudBookmarksFillerMode, ComparableRegex, DerivedDataConfig, DerivedDataTypesConfig, + HookBypass, HookConfig, HookManagerParams, HookParams, InfinitepushNamespace, + InfinitepushParams, LfsParams, PushParams, PushrebaseFlags, PushrebaseParams, RepoClientKnobs, + SegmentedChangelogConfig, ServiceWriteRestrictions, SourceControlServiceMonitoring, + SourceControlServiceParams, StorageConfig, UnodeVersion, WireprotoLoggingConfig, }; use mononoke_types::{MPath, PrefixTrie}; use regex::Regex; use repos::{ RawBookmarkConfig, RawBundle2ReplayParams, RawCacheWarmupConfig, RawCommitcloudBookmarksFiller, - RawDerivedDataConfig, RawHookConfig, RawHookManagerParams, RawInfinitepushParams, RawLfsParams, - RawPushParams, RawPushrebaseParams, RawRepoClientKnobs, RawSegmentedChangelogConfig, - RawServiceWriteRestrictions, RawSourceControlServiceMonitoring, RawSourceControlServiceParams, - RawUnodeVersion, RawWireprotoLoggingConfig, + RawDerivedDataConfig, RawDerivedDataTypesConfig, RawHookConfig, RawHookManagerParams, + RawInfinitepushParams, RawLfsParams, RawPushParams, RawPushrebaseParams, RawRepoClientKnobs, + RawSegmentedChangelogConfig, RawServiceWriteRestrictions, RawSourceControlServiceMonitoring, + RawSourceControlServiceParams, RawWireprotoLoggingConfig, }; use crate::convert::Convert; @@ -367,29 +367,34 @@ impl Convert for RawSourceControlServiceMonitoring { } } +impl Convert for RawDerivedDataTypesConfig { + type Output = DerivedDataTypesConfig; + + fn convert(self) -> Result { + let types = self.types.into_iter().collect(); + let unode_version = match self.unode_version { + None => UnodeVersion::default(), + Some(1) => UnodeVersion::V1, + Some(2) => UnodeVersion::V2, + Some(version) => return Err(anyhow!("unknown unode version {}", version)), + }; + let blame_filesize_limit = self.blame_filesize_limit.map(|limit| limit as u64); + Ok(DerivedDataTypesConfig { + types, + unode_version, + blame_filesize_limit, + }) + } +} + impl Convert for RawDerivedDataConfig { type Output = DerivedDataConfig; fn convert(self) -> Result { - let unode_version = if let Some(unode_version) = self.raw_unode_version { - match unode_version { - RawUnodeVersion::unode_version_v1(_) => UnodeVersion::V1, - RawUnodeVersion::unode_version_v2(_) => UnodeVersion::V2, - RawUnodeVersion::UnknownField(field) => { - return Err(anyhow!("unknown unode version {}", field)); - } - } - } else { - UnodeVersion::default() - }; - Ok(DerivedDataConfig { scuba_table: self.scuba_table, - derived_data_types: self.derived_data_types.unwrap_or_default(), - unode_version, - override_blame_filesize_limit: self - .override_blame_filesize_limit - .map(|limit| limit as u64), + enabled: self.enabled.convert()?.unwrap_or_default(), + backfilling: self.backfilling.convert()?.unwrap_or_default(), }) } } diff --git a/eden/mononoke/metaconfig/types/src/lib.rs b/eden/mononoke/metaconfig/types/src/lib.rs index 5332f04dc8..635ef212c0 100644 --- a/eden/mononoke/metaconfig/types/src/lib.rs +++ b/eden/mononoke/metaconfig/types/src/lib.rs @@ -13,7 +13,7 @@ use anyhow::{anyhow, Error, Result}; use std::{ - collections::{BTreeSet, HashMap, HashSet}, + collections::{HashMap, HashSet}, fmt, mem, num::{NonZeroU64, NonZeroUsize}, ops::Deref, @@ -214,14 +214,33 @@ pub struct RepoClientKnobs { pub struct DerivedDataConfig { /// Name of scuba table where all derivation will be logged to pub scuba_table: Option, - /// Types of derived data that can be derived for this repo - pub derived_data_types: BTreeSet, - /// What unode version should be used (defaults to V1) + + /// Configuration for enabled derived data types. + pub enabled: DerivedDataTypesConfig, + + /// Configuration for backfilling derived data types. + pub backfilling: DerivedDataTypesConfig, +} + +impl DerivedDataConfig { + /// Returns whether the named derived data type is enabled. + pub fn is_enabled(&self, name: &str) -> bool { + self.enabled.types.contains(name) + } +} + +/// Config for derived data types +#[derive(Eq, Clone, Default, Debug, PartialEq)] +pub struct DerivedDataTypesConfig { + /// The configured types. + pub types: HashSet, + + /// What unode version should be used. Default: V1. pub unode_version: UnodeVersion, + /// Override the file size limit for blame. Blame won't be derived for files which - /// size is above the limit. NOTE: if `override_blame_filesize_limit` is None - /// then a default limit will be used! - pub override_blame_filesize_limit: Option, + /// size is above the limit. Default: `blame::BLAME_FILESIZE_LIMIT`. + pub blame_filesize_limit: Option, } /// What type of unode derived data to generate diff --git a/eden/mononoke/mononoke_api/Cargo.toml b/eden/mononoke/mononoke_api/Cargo.toml index 5de970c566..2957e48371 100644 --- a/eden/mononoke/mononoke_api/Cargo.toml +++ b/eden/mononoke/mononoke_api/Cargo.toml @@ -28,6 +28,7 @@ hook_manager_factory = { path = "../hooks/hook_manager_factory" } hooks = { path = "../hooks" } live_commit_sync_config = { path = "../commit_rewriting/live_commit_sync_config" } manifest = { path = "../manifest" } +mercurial_derived_data = { path = "../derived_data/mercurial_derived_data" } mercurial_types = { path = "../mercurial/types" } metaconfig_parser = { path = "../metaconfig/parser" } metaconfig_types = { path = "../metaconfig/types" } diff --git a/eden/mononoke/mononoke_api/src/repo.rs b/eden/mononoke/mononoke_api/src/repo.rs index 262088698a..7ba10009a8 100644 --- a/eden/mononoke/mononoke_api/src/repo.rs +++ b/eden/mononoke/mononoke_api/src/repo.rs @@ -36,6 +36,7 @@ use hooks::HookManager; use itertools::Itertools; use live_commit_sync_config::TestLiveCommitSyncConfig; use live_commit_sync_config::{CfgrLiveCommitSyncConfig, LiveCommitSyncConfig}; +use mercurial_derived_data::MappedHgChangesetId; use mercurial_types::Globalrev; use metaconfig_types::{CommonConfig, RepoConfig}; use metaconfig_types::{ @@ -717,15 +718,13 @@ impl RepoContext { pub fn derive_changeset_info_enabled(&self) -> bool { self.blob_repo() .get_derived_data_config() - .derived_data_types - .contains(ChangesetInfo::NAME) + .is_enabled(ChangesetInfo::NAME) } pub fn derive_hgchangesets_enabled(&self) -> bool { self.blob_repo() .get_derived_data_config() - .derived_data_types - .contains("hgchangesets") + .is_enabled(MappedHgChangesetId::NAME) } /// Look up a changeset specifier to find the canonical bonsai changeset diff --git a/eden/mononoke/repo_client/unbundle/src/resolver.rs b/eden/mononoke/repo_client/unbundle/src/resolver.rs index 75b1125266..8c3f62f169 100644 --- a/eden/mononoke/repo_client/unbundle/src/resolver.rs +++ b/eden/mononoke/repo_client/unbundle/src/resolver.rs @@ -1213,8 +1213,7 @@ impl<'r> Bundle2Resolver<'r> { let has_skeleton_manifests = self .repo .get_derived_data_config() - .derived_data_types - .contains(RootSkeletonManifestId::NAME); + .is_enabled(RootSkeletonManifestId::NAME); let skip_on_upload = tunables().get_skip_case_conflict_check_on_changeset_upload(); // If casefolding checks are enabled we need to check on upload if // skeleton manifests are disabled for this repo, or if skipping diff --git a/eden/mononoke/repo_import/src/main.rs b/eden/mononoke/repo_import/src/main.rs index 1722ef1f52..ac86534fbe 100644 --- a/eden/mononoke/repo_import/src/main.rs +++ b/eden/mononoke/repo_import/src/main.rs @@ -318,18 +318,16 @@ async fn derive_bonsais_single_repo( repo: &BlobRepo, bcs_ids: &[ChangesetId], ) -> Result<(), Error> { - let derived_data_types = &repo.get_derived_data_config().derived_data_types; + let derived_data_types = &repo.get_derived_data_config().enabled.types; - let len = derived_data_types.len(); - let mut derived_utils = vec![]; - for ty in derived_data_types { - let utils = derived_data_utils(repo, ty)?; - derived_utils.push(utils); - } + let derived_utils: Vec<_> = derived_data_types + .iter() + .map(|ty| derived_data_utils(repo, ty)) + .collect::>()?; stream::iter(derived_utils) .map(Ok) - .try_for_each_concurrent(len, |derived_util| async move { + .try_for_each_concurrent(derived_data_types.len(), |derived_util| async move { for csid in bcs_ids { derived_util .derive(ctx.clone(), repo.clone(), csid.clone()) diff --git a/eden/mononoke/repo_import/src/tests.rs b/eden/mononoke/repo_import/src/tests.rs index f1d62fd5f8..daf9657e88 100644 --- a/eden/mononoke/repo_import/src/tests.rs +++ b/eden/mononoke/repo_import/src/tests.rs @@ -74,7 +74,8 @@ mod tests { )?; repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| { derived_data_config - .derived_data_types + .enabled + .types .remove(&TreeHandle::NAME.to_string()); derived_data_config }); @@ -765,7 +766,7 @@ mod tests { repo: &BlobRepo, cs_ids: &[ChangesetId], ) -> Result<()> { - let derived_data_types = &repo.get_derived_data_config().derived_data_types; + let derived_data_types = &repo.get_derived_data_config().enabled.types; for derived_data_type in derived_data_types { let derived_utils = derived_data_utils(repo, derived_data_type)?; diff --git a/eden/mononoke/server/repo_listener/src/repo_handlers.rs b/eden/mononoke/server/repo_listener/src/repo_handlers.rs index 39dcc6a273..24a48063e6 100644 --- a/eden/mononoke/server/repo_listener/src/repo_handlers.rs +++ b/eden/mononoke/server/repo_listener/src/repo_handlers.rs @@ -26,7 +26,7 @@ use futures::{ use futures_ext::{try_boxfuture, BoxFuture, FutureExt}; use futures_old::{future, Future}; use hook_manager_factory::make_hook_manager; -use maplit::btreeset; +use maplit::hashset; use mercurial_derived_data::MappedHgChangesetId; use metaconfig_types::{ CensoredScubaParams, CommitSyncConfig, RepoClientKnobs, RepoConfig, WireprotoLoggingConfig, @@ -282,7 +282,7 @@ pub fn repo_handlers( let mut warm_bookmarks_cache_builder = WarmBookmarksCacheBuilder::new(&ctx, &blobrepo); warm_bookmarks_cache_builder.add_derived_data_warmers( - &btreeset! { MappedHgChangesetId::NAME.to_string() }, + &hashset! { MappedHgChangesetId::NAME.to_string() }, )?; if warm_bookmark_cache_check_blobimport { let mutable_counters = diff --git a/eden/mononoke/tests/integration/library.sh b/eden/mononoke/tests/integration/library.sh index ebd059500b..22e8abf1d5 100644 --- a/eden/mononoke/tests/integration/library.sh +++ b/eden/mononoke/tests/integration/library.sh @@ -908,13 +908,13 @@ write_infinitepush_config "$reponame" if [[ -n "${ENABLED_DERIVED_DATA:-}" ]]; then cat >> "repos/$reponame/server.toml" <> "repos/$reponame/server.toml" <( // Only walk derived node types that the repo is configured to contain include_node_types.retain(|t| { if let Some(t) = t.derived_data_name() { - config.derived_data_config.derived_data_types.contains(t) + config.derived_data_config.is_enabled(t) } else { true }