mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 00:45:18 +03:00
derived_data: use new derived data configuration format
Summary: Change derived data config to have "enabled" config and "backfilling" config. The `Mapping` object has the responsibility of encapsulating the configuration options for the derived data type. Since it is only possible to obtain a `Mapping` from appropriate configuration, ownership of a `Mapping` means derivation is permitted, and so the `DeriveMode` enum is removed. Most callers will use `BonsaiDerived::derive`, or a default `derived_data_utils` implementation that requires the derived data to be enabled and configured on the repo. Backfillers can additionally use `derived_data_utils_for_backfill` which will use the `backfilling` configuration in preference to the default configuration. Reviewed By: ahornby Differential Revision: D25246317 fbshipit-source-id: 352fe6509572409bc3338dd43d157f34c73b9eac
This commit is contained in:
parent
05225bcf90
commit
73cdac45e3
@ -36,13 +36,13 @@ use filestore::FilestoreConfig;
|
||||
use fsnodes::RootFsnodeId;
|
||||
use futures::{compat::Future01CompatExt, future, try_join};
|
||||
use git_types::TreeHandle;
|
||||
use maplit::btreeset;
|
||||
use maplit::hashset;
|
||||
use memblob::Memblob;
|
||||
use mercurial_derived_data::MappedHgChangesetId;
|
||||
use mercurial_mutation::{HgMutationStore, SqlHgMutationStoreBuilder};
|
||||
use metaconfig_types::{
|
||||
self, CensoredScubaParams, DerivedDataConfig, FilestoreParams, Redaction, RepoConfig,
|
||||
SegmentedChangelogConfig, StorageConfig, UnodeVersion,
|
||||
self, CensoredScubaParams, DerivedDataConfig, DerivedDataTypesConfig, FilestoreParams,
|
||||
Redaction, RepoConfig, SegmentedChangelogConfig, StorageConfig, UnodeVersion,
|
||||
};
|
||||
use mononoke_types::RepositoryId;
|
||||
use newfilenodes::NewFilenodesBuilder;
|
||||
@ -418,20 +418,23 @@ pub fn new_memblob_empty_with_id(
|
||||
pub fn init_all_derived_data() -> DerivedDataConfig {
|
||||
DerivedDataConfig {
|
||||
scuba_table: None,
|
||||
derived_data_types: btreeset! {
|
||||
BlameRoot::NAME.to_string(),
|
||||
FilenodesOnlyPublic::NAME.to_string(),
|
||||
ChangesetInfo::NAME.to_string(),
|
||||
RootFastlog::NAME.to_string(),
|
||||
RootFsnodeId::NAME.to_string(),
|
||||
RootSkeletonManifestId::NAME.to_string(),
|
||||
RootDeletedManifestId::NAME.to_string(),
|
||||
RootUnodeManifestId::NAME.to_string(),
|
||||
TreeHandle::NAME.to_string(),
|
||||
MappedHgChangesetId::NAME.to_string(),
|
||||
enabled: DerivedDataTypesConfig {
|
||||
types: hashset! {
|
||||
BlameRoot::NAME.to_string(),
|
||||
FilenodesOnlyPublic::NAME.to_string(),
|
||||
ChangesetInfo::NAME.to_string(),
|
||||
RootFastlog::NAME.to_string(),
|
||||
RootFsnodeId::NAME.to_string(),
|
||||
RootSkeletonManifestId::NAME.to_string(),
|
||||
RootDeletedManifestId::NAME.to_string(),
|
||||
RootUnodeManifestId::NAME.to_string(),
|
||||
TreeHandle::NAME.to_string(),
|
||||
MappedHgChangesetId::NAME.to_string(),
|
||||
},
|
||||
unode_version: UnodeVersion::V2,
|
||||
..Default::default()
|
||||
},
|
||||
unode_version: UnodeVersion::V2,
|
||||
override_blame_filesize_limit: None,
|
||||
backfilling: DerivedDataTypesConfig::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,8 +297,7 @@ impl AffectedChangesets {
|
||||
&& tunables().get_check_case_conflicts_on_bookmark_movement()
|
||||
&& repo
|
||||
.get_derived_data_config()
|
||||
.derived_data_types
|
||||
.contains(RootSkeletonManifestId::NAME)
|
||||
.is_enabled(RootSkeletonManifestId::NAME)
|
||||
{
|
||||
self.load_additional_changesets(
|
||||
ctx,
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#![deny(warnings)]
|
||||
|
||||
use std::collections::{BTreeSet, HashMap, VecDeque};
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
@ -92,13 +92,13 @@ impl<'a> WarmBookmarksCacheBuilder<'a> {
|
||||
}
|
||||
|
||||
pub fn add_all_derived_data_warmers(&mut self) -> Result<(), Error> {
|
||||
self.add_derived_data_warmers(&self.repo.get_derived_data_config().derived_data_types)
|
||||
self.add_derived_data_warmers(&self.repo.get_derived_data_config().enabled.types)
|
||||
}
|
||||
|
||||
pub fn add_derived_data_warmers(&mut self, types: &BTreeSet<String>) -> Result<(), Error> {
|
||||
let derived_data_types = &self.repo.get_derived_data_config().derived_data_types;
|
||||
pub fn add_derived_data_warmers(&mut self, types: &HashSet<String>) -> Result<(), Error> {
|
||||
let config = &self.repo.get_derived_data_config();
|
||||
for ty in types {
|
||||
if !derived_data_types.contains(ty) {
|
||||
if !config.is_enabled(ty) {
|
||||
return Err(anyhow!("{} is not enabled for {}", ty, self.repo.name()));
|
||||
}
|
||||
}
|
||||
|
@ -26,7 +26,8 @@ use cmdlib::{
|
||||
use context::CoreContext;
|
||||
use derived_data::BonsaiDerivable;
|
||||
use derived_data_utils::{
|
||||
derived_data_utils, derived_data_utils_unsafe, DerivedUtils, ThinOut, POSSIBLE_DERIVED_TYPES,
|
||||
derived_data_utils, derived_data_utils_for_backfill, DerivedUtils, ThinOut,
|
||||
POSSIBLE_DERIVED_TYPES,
|
||||
};
|
||||
use fbinit::FacebookInit;
|
||||
use fsnodes::RootFsnodeId;
|
||||
@ -36,12 +37,11 @@ use futures::{
|
||||
stream::{self, StreamExt, TryStreamExt},
|
||||
};
|
||||
use futures_stats::TimedFutureExt;
|
||||
use metaconfig_types::DerivedDataConfig;
|
||||
use mononoke_types::{ChangesetId, DateTime};
|
||||
use slog::{info, Logger};
|
||||
use stats::prelude::*;
|
||||
use std::{
|
||||
collections::{BTreeSet, HashMap},
|
||||
collections::{BTreeSet, HashMap, HashSet},
|
||||
fs,
|
||||
path::Path,
|
||||
sync::Arc,
|
||||
@ -268,7 +268,10 @@ async fn run_subcmd<'a>(
|
||||
(SUBCOMMAND_BACKFILL_ALL, Some(sub_m)) => {
|
||||
let repo = args::open_repo_unredacted(fb, logger, matches).await?;
|
||||
let derived_data_types = sub_m.values_of(ARG_DERIVED_DATA_TYPE).map_or_else(
|
||||
|| repo.get_derived_data_config().derived_data_types.clone(),
|
||||
|| {
|
||||
let config = repo.get_derived_data_config();
|
||||
&config.enabled.types | &config.backfilling.types
|
||||
},
|
||||
|names| names.map(ToString::to_string).collect(),
|
||||
);
|
||||
subcommand_backfill_all(&ctx, &repo, derived_data_types).await
|
||||
@ -299,19 +302,9 @@ async fn run_subcmd<'a>(
|
||||
.map(|limit| limit.parse::<usize>())
|
||||
.transpose()?;
|
||||
|
||||
let repo =
|
||||
let mut repo =
|
||||
open_repo_maybe_unredacted(fb, &logger, &matches, &derived_data_type).await?;
|
||||
|
||||
// Backfill is used when when a derived data type is not enabled yet, and so
|
||||
// any attempt to call BonsaiDerived::derive() fails. However calling
|
||||
// BonsaiDerived::derive() might be useful, and so the lines below explicitly
|
||||
// enable `derived_data_type` to allow calling BonsaiDerived::derive() if necessary.
|
||||
let mut repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| {
|
||||
derived_data_config
|
||||
.derived_data_types
|
||||
.insert(derived_data_type.clone());
|
||||
derived_data_config
|
||||
});
|
||||
info!(
|
||||
ctx.logger(),
|
||||
"reading all changesets for: {:?}",
|
||||
@ -359,7 +352,7 @@ async fn run_subcmd<'a>(
|
||||
subcommand_backfill(
|
||||
&ctx,
|
||||
&repo,
|
||||
&derived_data_type,
|
||||
derived_data_type.as_str(),
|
||||
regenerate,
|
||||
changesets,
|
||||
cleaner,
|
||||
@ -406,9 +399,10 @@ async fn run_subcmd<'a>(
|
||||
let repo = args::open_repo_unredacted(fb, logger, matches).await?;
|
||||
let types = repo
|
||||
.get_derived_data_config()
|
||||
.derived_data_types
|
||||
.clone()
|
||||
.into_iter()
|
||||
.enabled
|
||||
.types
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
(repo, types)
|
||||
}
|
||||
@ -450,12 +444,12 @@ fn parse_serialized_commits<P: AsRef<Path>>(file: P) -> Result<Vec<ChangesetEntr
|
||||
async fn subcommand_backfill_all(
|
||||
ctx: &CoreContext,
|
||||
repo: &BlobRepo,
|
||||
derived_data_types: BTreeSet<String>,
|
||||
derived_data_types: HashSet<String>,
|
||||
) -> Result<()> {
|
||||
info!(ctx.logger(), "derived data types: {:?}", derived_data_types);
|
||||
let derivers = derived_data_types
|
||||
.iter()
|
||||
.map(|name| derived_data_utils_unsafe(repo, name.as_str()))
|
||||
.map(|name| derived_data_utils_for_backfill(repo, name.as_str()))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
tail_batch_iteration(ctx, repo, derivers.as_ref()).await
|
||||
}
|
||||
@ -467,12 +461,12 @@ fn truncate_duration(duration: Duration) -> Duration {
|
||||
async fn subcommand_backfill(
|
||||
ctx: &CoreContext,
|
||||
repo: &BlobRepo,
|
||||
derived_data_type: &String,
|
||||
derived_data_type: &str,
|
||||
regenerate: bool,
|
||||
changesets: Vec<ChangesetId>,
|
||||
mut cleaner: Option<impl dry_run::Cleaner>,
|
||||
) -> Result<()> {
|
||||
let derived_utils = &derived_data_utils_unsafe(repo, derived_data_type.as_str())?;
|
||||
let derived_utils = &derived_data_utils_for_backfill(repo, derived_data_type)?;
|
||||
|
||||
info!(
|
||||
ctx.logger(),
|
||||
@ -571,9 +565,9 @@ async fn subcommand_tail(
|
||||
|
||||
let derive_utils: Vec<Arc<dyn DerivedUtils>> = unredacted_repo
|
||||
.get_derived_data_config()
|
||||
.derived_data_types
|
||||
.clone()
|
||||
.into_iter()
|
||||
.enabled
|
||||
.types
|
||||
.iter()
|
||||
.map(|name| derived_data_utils(&unredacted_repo, name))
|
||||
.collect::<Result<_>>()?;
|
||||
slog::info!(
|
||||
|
@ -14,6 +14,7 @@ use cloned::cloned;
|
||||
use context::CoreContext;
|
||||
use derived_data::{
|
||||
impl_bonsai_derived_mapping, BlobstoreExistsMapping, BonsaiDerivable, BonsaiDerived,
|
||||
DerivedDataTypesConfig,
|
||||
};
|
||||
use filestore::{self, FetchKey};
|
||||
use futures::{future, StreamExt, TryFutureExt, TryStreamExt};
|
||||
@ -119,13 +120,9 @@ pub struct BlameRootMapping {
|
||||
impl BlobstoreExistsMapping for BlameRootMapping {
|
||||
type Value = BlameRoot;
|
||||
|
||||
fn new(repo: &BlobRepo) -> Result<Self> {
|
||||
let options = BlameDeriveOptions {
|
||||
filesize_limit: repo
|
||||
.get_derived_data_config()
|
||||
.override_blame_filesize_limit
|
||||
.unwrap_or(BLAME_FILESIZE_LIMIT),
|
||||
};
|
||||
fn new(repo: &BlobRepo, config: &DerivedDataTypesConfig) -> Result<Self> {
|
||||
let filesize_limit = config.blame_filesize_limit.unwrap_or(BLAME_FILESIZE_LIMIT);
|
||||
let options = BlameDeriveOptions { filesize_limit };
|
||||
Ok(Self {
|
||||
blobstore: repo.get_blobstore().boxed(),
|
||||
options,
|
||||
|
@ -9,7 +9,7 @@
|
||||
#![type_length_limit = "1441792"]
|
||||
|
||||
mod derived;
|
||||
pub use derived::{fetch_file_full_content, BlameRoot, BlameRootMapping};
|
||||
pub use derived::{fetch_file_full_content, BlameRoot, BlameRootMapping, BLAME_FILESIZE_LIMIT};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
@ -247,7 +247,7 @@ async fn test_blame_file_size_limit_rejected(fb: FacebookInit) -> Result<(), Err
|
||||
fetch_blame(ctx, repo, c1, MPath::new(file1)?).await?;
|
||||
|
||||
let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| {
|
||||
derived_data_config.override_blame_filesize_limit = Some(4);
|
||||
derived_data_config.enabled.blame_filesize_limit = Some(4);
|
||||
derived_data_config
|
||||
});
|
||||
|
||||
|
@ -12,7 +12,9 @@ use async_trait::async_trait;
|
||||
use blobrepo::BlobRepo;
|
||||
use blobstore::Blobstore;
|
||||
use context::CoreContext;
|
||||
use derived_data::{impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable};
|
||||
use derived_data::{
|
||||
impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, DerivedDataTypesConfig,
|
||||
};
|
||||
use mononoke_types::BonsaiChangeset;
|
||||
|
||||
use crate::ChangesetInfo;
|
||||
@ -44,7 +46,7 @@ pub struct ChangesetInfoMapping {
|
||||
impl BlobstoreRootIdMapping for ChangesetInfoMapping {
|
||||
type Value = ChangesetInfo;
|
||||
|
||||
fn new(repo: &BlobRepo) -> Result<Self> {
|
||||
fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result<Self> {
|
||||
Ok(Self {
|
||||
blobstore: repo.get_blobstore().boxed(),
|
||||
})
|
||||
|
@ -12,7 +12,9 @@ use blobrepo::BlobRepo;
|
||||
use blobstore::{Blobstore, BlobstoreGetData};
|
||||
use bytes::Bytes;
|
||||
use context::CoreContext;
|
||||
use derived_data::{impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable};
|
||||
use derived_data::{
|
||||
impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, DerivedDataTypesConfig,
|
||||
};
|
||||
use mononoke_types::{BlobstoreBytes, BonsaiChangeset, DeletedManifestId};
|
||||
use repo_blobstore::RepoBlobstore;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
@ -85,7 +87,7 @@ pub struct RootDeletedManifestMapping {
|
||||
impl BlobstoreRootIdMapping for RootDeletedManifestMapping {
|
||||
type Value = RootDeletedManifestId;
|
||||
|
||||
fn new(repo: &BlobRepo) -> Result<Self> {
|
||||
fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result<Self> {
|
||||
Ok(Self {
|
||||
blobstore: repo.get_blobstore(),
|
||||
})
|
||||
|
@ -64,8 +64,7 @@ pub async fn resolve_path_state(
|
||||
|
||||
let use_deleted_manifest = repo
|
||||
.get_derived_data_config()
|
||||
.derived_data_types
|
||||
.contains(RootDeletedManifestId::NAME);
|
||||
.is_enabled(RootDeletedManifestId::NAME);
|
||||
if !use_deleted_manifest {
|
||||
return Ok(None);
|
||||
}
|
||||
|
@ -13,6 +13,7 @@ use cloned::cloned;
|
||||
use context::CoreContext;
|
||||
use derived_data::{
|
||||
impl_bonsai_derived_mapping, BlobstoreExistsMapping, BonsaiDerivable, BonsaiDerived,
|
||||
DerivedDataTypesConfig,
|
||||
};
|
||||
use futures::future;
|
||||
use futures::stream::TryStreamExt;
|
||||
@ -144,7 +145,7 @@ pub struct RootFastlogMapping {
|
||||
impl BlobstoreExistsMapping for RootFastlogMapping {
|
||||
type Value = RootFastlog;
|
||||
|
||||
fn new(repo: &BlobRepo) -> Result<Self> {
|
||||
fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result<Self> {
|
||||
Ok(Self {
|
||||
blobstore: repo.get_blobstore().boxed(),
|
||||
})
|
||||
|
@ -14,7 +14,9 @@ use blobrepo_hg::BlobRepoHg;
|
||||
use blobstore::Loadable;
|
||||
use borrowed::borrowed;
|
||||
use context::CoreContext;
|
||||
use derived_data::{BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError};
|
||||
use derived_data::{
|
||||
BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError, DerivedDataTypesConfig,
|
||||
};
|
||||
use filenodes::{FilenodeInfo, FilenodeResult, PreparedFilenode};
|
||||
use futures::{
|
||||
compat::Future01CompatExt, future::try_join_all, pin_mut, stream, FutureExt, StreamExt,
|
||||
@ -307,7 +309,7 @@ pub struct FilenodesOnlyPublicMapping {
|
||||
}
|
||||
|
||||
impl FilenodesOnlyPublicMapping {
|
||||
pub fn new(repo: &BlobRepo) -> Result<Self, DeriveError> {
|
||||
pub fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result<Self, DeriveError> {
|
||||
Ok(Self { repo: repo.clone() })
|
||||
}
|
||||
}
|
||||
@ -320,7 +322,8 @@ impl BonsaiDerived for FilenodesOnlyPublic {
|
||||
_ctx: &CoreContext,
|
||||
repo: &BlobRepo,
|
||||
) -> Result<Self::DefaultMapping, DeriveError> {
|
||||
FilenodesOnlyPublicMapping::new(repo)
|
||||
let config = derived_data::enabled_type_config(repo, Self::NAME)?;
|
||||
FilenodesOnlyPublicMapping::new(repo, config)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@ use bytes::Bytes;
|
||||
use context::CoreContext;
|
||||
use derived_data::{
|
||||
impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, BonsaiDerivedMapping,
|
||||
DeriveMode,
|
||||
DerivedDataTypesConfig,
|
||||
};
|
||||
use futures::stream::{self, StreamExt, TryStreamExt};
|
||||
use mononoke_types::{
|
||||
@ -92,13 +92,10 @@ impl BonsaiDerivable for RootFsnodeId {
|
||||
repo: &BlobRepo,
|
||||
csids: Vec<ChangesetId>,
|
||||
mapping: &BatchMapping,
|
||||
mode: DeriveMode,
|
||||
) -> Result<HashMap<ChangesetId, Self>, Error>
|
||||
where
|
||||
BatchMapping: BonsaiDerivedMapping<Value = Self> + Send + Sync + Clone + 'static,
|
||||
{
|
||||
mode.check_if_derive_allowed::<Self>(repo)?;
|
||||
|
||||
let derived = derive_fsnode_in_batch(ctx, repo, csids.clone()).await?;
|
||||
|
||||
stream::iter(derived.into_iter().map(|(cs_id, derived)| async move {
|
||||
@ -123,7 +120,7 @@ pub struct RootFsnodeMapping {
|
||||
impl BlobstoreRootIdMapping for RootFsnodeMapping {
|
||||
type Value = RootFsnodeId;
|
||||
|
||||
fn new(repo: &BlobRepo) -> Result<Self> {
|
||||
fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result<Self> {
|
||||
Ok(Self {
|
||||
blobstore: repo.get_blobstore(),
|
||||
})
|
||||
|
@ -16,7 +16,9 @@ use mononoke_types::{BonsaiChangeset, ChangesetId, RepositoryId};
|
||||
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use derived_data::{BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError};
|
||||
use derived_data::{
|
||||
BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError, DerivedDataTypesConfig,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
||||
pub struct MappedHgChangesetId(pub HgChangesetId);
|
||||
@ -45,11 +47,11 @@ pub struct HgChangesetIdMapping {
|
||||
}
|
||||
|
||||
impl HgChangesetIdMapping {
|
||||
pub fn new(repo: &BlobRepo) -> Self {
|
||||
Self {
|
||||
pub fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result<Self, DeriveError> {
|
||||
Ok(Self {
|
||||
repo_id: repo.get_repoid(),
|
||||
mapping: repo.attribute_expected::<dyn BonsaiHgMapping>().clone(),
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -99,6 +101,7 @@ impl BonsaiDerived for MappedHgChangesetId {
|
||||
_ctx: &CoreContext,
|
||||
repo: &BlobRepo,
|
||||
) -> Result<Self::DefaultMapping, DeriveError> {
|
||||
Ok(HgChangesetIdMapping::new(repo))
|
||||
let config = derived_data::enabled_type_config(repo, Self::NAME)?;
|
||||
Ok(HgChangesetIdMapping::new(repo, config)?)
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ use bytes::Bytes;
|
||||
use context::CoreContext;
|
||||
use derived_data::{
|
||||
impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, BonsaiDerivedMapping,
|
||||
DeriveMode,
|
||||
DerivedDataTypesConfig,
|
||||
};
|
||||
use futures::stream::{self, StreamExt, TryStreamExt};
|
||||
use mononoke_types::{
|
||||
@ -94,13 +94,10 @@ impl BonsaiDerivable for RootSkeletonManifestId {
|
||||
repo: &BlobRepo,
|
||||
csids: Vec<ChangesetId>,
|
||||
mapping: &BatchMapping,
|
||||
mode: DeriveMode,
|
||||
) -> Result<HashMap<ChangesetId, Self>, Error>
|
||||
where
|
||||
BatchMapping: BonsaiDerivedMapping<Value = Self> + Send + Sync + Clone + 'static,
|
||||
{
|
||||
mode.check_if_derive_allowed::<Self>(repo)?;
|
||||
|
||||
let derived = derive_skeleton_manifests_in_batch(ctx, repo, csids.clone()).await?;
|
||||
|
||||
stream::iter(derived.into_iter().map(|(cs_id, derived)| async move {
|
||||
@ -125,7 +122,7 @@ pub struct RootSkeletonManifestMapping {
|
||||
impl BlobstoreRootIdMapping for RootSkeletonManifestMapping {
|
||||
type Value = RootSkeletonManifestId;
|
||||
|
||||
fn new(repo: &BlobRepo) -> Result<Self> {
|
||||
fn new(repo: &BlobRepo, _config: &DerivedDataTypesConfig) -> Result<Self> {
|
||||
Ok(Self {
|
||||
blobstore: repo.get_blobstore(),
|
||||
})
|
||||
|
@ -18,7 +18,7 @@ use futures::{
|
||||
TryStreamExt,
|
||||
};
|
||||
use futures_stats::{futures03::TimedFutureExt, FutureStats};
|
||||
use metaconfig_types::DerivedDataConfig;
|
||||
use metaconfig_types::{DerivedDataConfig, DerivedDataTypesConfig};
|
||||
use mononoke_types::ChangesetId;
|
||||
use scuba_ext::MononokeScubaSampleBuilder;
|
||||
use slog::debug;
|
||||
@ -44,40 +44,23 @@ define_stats! {
|
||||
|
||||
const LEASE_WARNING_THRESHOLD: Duration = Duration::from_secs(60);
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||
pub enum DeriveMode {
|
||||
/// This mode should almost always be preferred
|
||||
OnlyIfEnabled,
|
||||
/// This mode should rarely be used, perhaps only for backfilling type of derived data
|
||||
/// which is not enabled in this repo yet
|
||||
Unsafe,
|
||||
}
|
||||
|
||||
impl DeriveMode {
|
||||
/// Check that derivation is suitable for a particular derived data type
|
||||
/// in this mode.
|
||||
///
|
||||
/// Returns `DeriveError` if derivation should not proceed.
|
||||
pub fn check_if_derive_allowed<Derivable: BonsaiDerivable>(
|
||||
self,
|
||||
repo: &BlobRepo,
|
||||
) -> Result<(), DeriveError> {
|
||||
if self == DeriveMode::OnlyIfEnabled {
|
||||
if !repo
|
||||
.get_derived_data_config()
|
||||
.derived_data_types
|
||||
.contains(Derivable::NAME)
|
||||
{
|
||||
STATS::derived_data_disabled
|
||||
.add_value(1, (repo.get_repoid().id(), Derivable::NAME));
|
||||
return Err(DeriveError::Disabled(
|
||||
Derivable::NAME,
|
||||
repo.get_repoid(),
|
||||
repo.name().clone(),
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
/// Checks that the named derived data type is enabled, and returns the
|
||||
/// enabled derived data types config if it is. Returns an error if the
|
||||
/// derived data type is not enabled.
|
||||
pub fn enabled_type_config<'repo>(
|
||||
repo: &'repo BlobRepo,
|
||||
name: &'static str,
|
||||
) -> Result<&'repo DerivedDataTypesConfig, DeriveError> {
|
||||
let config = repo.get_derived_data_config();
|
||||
if config.enabled.types.contains(name) {
|
||||
Ok(&config.enabled)
|
||||
} else {
|
||||
STATS::derived_data_disabled.add_value(1, (repo.get_repoid().id(), name));
|
||||
Err(DeriveError::Disabled(
|
||||
name,
|
||||
repo.get_repoid(),
|
||||
repo.name().clone(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@ -114,12 +97,10 @@ pub async fn derive_impl<
|
||||
repo: &BlobRepo,
|
||||
derived_mapping: &Mapping,
|
||||
start_csid: ChangesetId,
|
||||
mode: DeriveMode,
|
||||
) -> Result<Derivable, DeriveError> {
|
||||
let derivation = async {
|
||||
let all_csids =
|
||||
find_topo_sorted_underived(ctx, repo, derived_mapping, Some(start_csid), None, mode)
|
||||
.await?;
|
||||
find_topo_sorted_underived(ctx, repo, derived_mapping, Some(start_csid), None).await?;
|
||||
|
||||
for csid in &all_csids {
|
||||
ctx.scuba().clone().log_with_msg(
|
||||
@ -200,10 +181,7 @@ pub(crate) async fn find_topo_sorted_underived<
|
||||
derived_mapping: &Mapping,
|
||||
start_csids: Changesets,
|
||||
limit: Option<u64>,
|
||||
mode: DeriveMode,
|
||||
) -> Result<Vec<ChangesetId>, Error> {
|
||||
mode.check_if_derive_allowed::<Derivable>(repo)?;
|
||||
|
||||
let changeset_fetcher = repo.get_changeset_fetcher();
|
||||
// This is necessary to avoid visiting the same commit a lot of times in mergy repos
|
||||
let visited: Arc<Mutex<HashSet<ChangesetId>>> = Arc::new(Mutex::new(HashSet::new()));
|
||||
@ -703,7 +681,8 @@ mod test {
|
||||
async fn derive_for_master(ctx: CoreContext, repo: BlobRepo) {
|
||||
let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| {
|
||||
derived_data_config
|
||||
.derived_data_types
|
||||
.enabled
|
||||
.types
|
||||
.insert(TestGenNum::NAME.to_string());
|
||||
derived_data_config
|
||||
});
|
||||
@ -748,7 +727,8 @@ mod test {
|
||||
.await
|
||||
.dangerous_override(|mut derived_data_config: DerivedDataConfig| {
|
||||
derived_data_config
|
||||
.derived_data_types
|
||||
.enabled
|
||||
.types
|
||||
.insert(TestGenNum::NAME.to_string());
|
||||
derived_data_config
|
||||
})
|
||||
@ -860,7 +840,8 @@ mod test {
|
||||
let repo = linear::getrepo(fb).await;
|
||||
let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| {
|
||||
derived_data_config
|
||||
.derived_data_types
|
||||
.enabled
|
||||
.types
|
||||
.insert(TestGenNum::NAME.to_string());
|
||||
derived_data_config
|
||||
});
|
||||
@ -874,8 +855,7 @@ mod test {
|
||||
// Reverse them to derive parents before children
|
||||
let cs_ids = cs_ids.clone().into_iter().rev().collect::<Vec<_>>();
|
||||
let mapping = TestGenNum::default_mapping(&ctx, &repo)?;
|
||||
let derived_batch =
|
||||
TestGenNum::batch_derive(&ctx, &repo, cs_ids, &mapping, DeriveMode::Unsafe).await?;
|
||||
let derived_batch = TestGenNum::batch_derive(&ctx, &repo, cs_ids, &mapping).await?;
|
||||
derived_batch
|
||||
.get(&master_cs_id)
|
||||
.unwrap_or_else(|| panic!("{} has not been derived", master_cs_id))
|
||||
@ -886,7 +866,8 @@ mod test {
|
||||
let repo = linear::getrepo(fb).await;
|
||||
let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| {
|
||||
derived_data_config
|
||||
.derived_data_types
|
||||
.enabled
|
||||
.types
|
||||
.insert(TestGenNum::NAME.to_string());
|
||||
derived_data_config
|
||||
});
|
||||
|
@ -24,9 +24,9 @@ pub mod batch;
|
||||
pub mod derive_impl;
|
||||
pub mod mapping_impl;
|
||||
|
||||
pub use derive_impl::enabled_type_config;
|
||||
pub use mapping_impl::{BlobstoreExistsMapping, BlobstoreRootIdMapping};
|
||||
|
||||
pub use crate::derive_impl::DeriveMode;
|
||||
pub use metaconfig_types::DerivedDataTypesConfig;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum DeriveError {
|
||||
@ -72,7 +72,6 @@ pub trait BonsaiDerivable: Sized + 'static + Send + Sync + Clone {
|
||||
repo: &BlobRepo,
|
||||
csids: Vec<ChangesetId>,
|
||||
mapping: &Mapping,
|
||||
mode: DeriveMode,
|
||||
) -> Result<HashMap<ChangesetId, Self>, Error>
|
||||
where
|
||||
Mapping: BonsaiDerivedMapping<Value = Self> + Send + Sync + Clone + 'static,
|
||||
@ -83,7 +82,7 @@ pub trait BonsaiDerivable: Sized + 'static + Send + Sync + Clone {
|
||||
// cause O(n^2) derivations.
|
||||
for csid in csids {
|
||||
let derived =
|
||||
derive_impl::derive_impl::<Self, Mapping>(ctx, repo, mapping, csid, mode).await?;
|
||||
derive_impl::derive_impl::<Self, Mapping>(ctx, repo, mapping, csid).await?;
|
||||
res.insert(csid, derived);
|
||||
}
|
||||
Ok(res)
|
||||
@ -120,14 +119,7 @@ pub trait BonsaiDerived: Sized + 'static + Send + Sync + Clone + BonsaiDerivable
|
||||
csid: ChangesetId,
|
||||
) -> Result<Self, DeriveError> {
|
||||
let mapping = Self::default_mapping(&ctx, &repo)?;
|
||||
derive_impl::derive_impl::<Self, Self::DefaultMapping>(
|
||||
ctx,
|
||||
repo,
|
||||
&mapping,
|
||||
csid,
|
||||
DeriveMode::OnlyIfEnabled,
|
||||
)
|
||||
.await
|
||||
derive_impl::derive_impl::<Self, Self::DefaultMapping>(ctx, repo, &mapping, csid).await
|
||||
}
|
||||
|
||||
/// Fetch the derived data in cases where we might not want to trigger derivation, e.g. when scrubbing.
|
||||
@ -156,7 +148,6 @@ pub trait BonsaiDerived: Sized + 'static + Send + Sync + Clone + BonsaiDerivable
|
||||
&mapping,
|
||||
Some(*csid),
|
||||
Some(limit),
|
||||
DeriveMode::OnlyIfEnabled,
|
||||
)
|
||||
.await?;
|
||||
Ok(underived.len() as u64)
|
||||
@ -173,12 +164,7 @@ pub trait BonsaiDerived: Sized + 'static + Send + Sync + Clone + BonsaiDerivable
|
||||
) -> Result<Vec<ChangesetId>, DeriveError> {
|
||||
let mapping = Self::default_mapping(&ctx, &repo)?;
|
||||
let underived = derive_impl::find_topo_sorted_underived::<Self, Self::DefaultMapping, _>(
|
||||
ctx,
|
||||
repo,
|
||||
&mapping,
|
||||
csids,
|
||||
None,
|
||||
DeriveMode::OnlyIfEnabled,
|
||||
ctx, repo, &mapping, csids, None,
|
||||
)
|
||||
.await?;
|
||||
Ok(underived)
|
||||
|
@ -14,6 +14,7 @@ use blobrepo::BlobRepo;
|
||||
use blobstore::{Blobstore, BlobstoreBytes, BlobstoreGetData};
|
||||
use context::CoreContext;
|
||||
use futures::stream::{FuturesUnordered, TryStreamExt};
|
||||
use metaconfig_types::DerivedDataTypesConfig;
|
||||
use mononoke_types::ChangesetId;
|
||||
|
||||
use crate::{BonsaiDerivable, BonsaiDerived};
|
||||
@ -31,7 +32,7 @@ pub trait BlobstoreRootIdMapping {
|
||||
+ Sized;
|
||||
|
||||
/// Create a new instance of this mapping.
|
||||
fn new(repo: &BlobRepo) -> Result<Self>
|
||||
fn new(repo: &BlobRepo, config: &DerivedDataTypesConfig) -> Result<Self>
|
||||
where
|
||||
Self: Sized;
|
||||
|
||||
@ -92,7 +93,7 @@ pub trait BlobstoreExistsMapping {
|
||||
type Value: BonsaiDerived + From<ChangesetId> + Send + Sync + Clone;
|
||||
|
||||
/// Create a new instance of this mapping.
|
||||
fn new(repo: &BlobRepo) -> Result<Self>
|
||||
fn new(repo: &BlobRepo, config: &DerivedDataTypesConfig) -> Result<Self>
|
||||
where
|
||||
Self: Sized;
|
||||
|
||||
@ -212,7 +213,8 @@ macro_rules! impl_bonsai_derived_mapping {
|
||||
_ctx: &::context::CoreContext,
|
||||
repo: &::blobrepo::BlobRepo,
|
||||
) -> ::std::result::Result<Self::DefaultMapping, $crate::DeriveError> {
|
||||
::std::result::Result::Ok(<$mapping as $mapping_impl>::new(repo)?)
|
||||
let config = $crate::derive_impl::enabled_type_config(repo, Self::NAME)?;
|
||||
::std::result::Result::Ok(<$mapping as $mapping_impl>::new(repo, config)?)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -644,7 +644,7 @@ mod tests {
|
||||
|
||||
// Unodes v1 should create a new one that points to the parent unode
|
||||
let repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| {
|
||||
derived_data_config.unode_version = UnodeVersion::V1;
|
||||
derived_data_config.enabled.unode_version = UnodeVersion::V1;
|
||||
derived_data_config
|
||||
});
|
||||
let (p1_unodes, merge_unodes) = find_unodes(ctx.clone(), repo.clone()).await?;
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#![deny(warnings)]
|
||||
|
||||
use anyhow::Error;
|
||||
use anyhow::{Error, Result};
|
||||
use blobrepo::BlobRepo;
|
||||
use cloned::cloned;
|
||||
use context::CoreContext;
|
||||
|
@ -12,7 +12,9 @@ use blobrepo::BlobRepo;
|
||||
use blobstore::{Blobstore, BlobstoreGetData};
|
||||
use bytes::Bytes;
|
||||
use context::CoreContext;
|
||||
use derived_data::{impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable};
|
||||
use derived_data::{
|
||||
impl_bonsai_derived_mapping, BlobstoreRootIdMapping, BonsaiDerivable, DerivedDataTypesConfig,
|
||||
};
|
||||
use futures::TryFutureExt;
|
||||
use metaconfig_types::UnodeVersion;
|
||||
use mononoke_types::{
|
||||
@ -93,11 +95,10 @@ pub struct RootUnodeManifestMapping {
|
||||
impl BlobstoreRootIdMapping for RootUnodeManifestMapping {
|
||||
type Value = RootUnodeManifestId;
|
||||
|
||||
fn new(repo: &BlobRepo) -> Result<Self> {
|
||||
let unode_version = repo.get_derived_data_config().unode_version;
|
||||
fn new(repo: &BlobRepo, config: &DerivedDataTypesConfig) -> Result<Self> {
|
||||
Ok(Self {
|
||||
blobstore: repo.get_blobstore(),
|
||||
unode_version,
|
||||
unode_version: config.unode_version,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ use context::CoreContext;
|
||||
use deleted_files_manifest::{RootDeletedManifestId, RootDeletedManifestMapping};
|
||||
use derived_data::{
|
||||
derive_impl::derive_impl, BlobstoreExistsMapping, BlobstoreRootIdMapping, BonsaiDerivable,
|
||||
BonsaiDerived, BonsaiDerivedMapping, DeriveError, DeriveMode, RegenerateMapping,
|
||||
BonsaiDerived, BonsaiDerivedMapping, DeriveError, DerivedDataTypesConfig, RegenerateMapping,
|
||||
};
|
||||
use derived_data_filenodes::{FilenodesOnlyPublic, FilenodesOnlyPublicMapping};
|
||||
use fastlog::{RootFastlog, RootFastlogMapping};
|
||||
@ -170,13 +170,12 @@ pub trait DerivedUtils: Send + Sync + 'static {
|
||||
#[derive(Clone)]
|
||||
struct DerivedUtilsFromMapping<M> {
|
||||
mapping: RegenerateMapping<M>,
|
||||
mode: DeriveMode,
|
||||
}
|
||||
|
||||
impl<M> DerivedUtilsFromMapping<M> {
|
||||
fn new(mapping: M, mode: DeriveMode) -> Self {
|
||||
fn new(mapping: M) -> Self {
|
||||
let mapping = RegenerateMapping::new(mapping);
|
||||
Self { mapping, mode }
|
||||
Self { mapping }
|
||||
}
|
||||
}
|
||||
|
||||
@ -196,9 +195,9 @@ where
|
||||
// `self.mapping` there. This will allow us to
|
||||
// e.g. regenerate derived data for the commit
|
||||
// even if it was already generated (see RegenerateMapping call).
|
||||
cloned!(self.mapping, self.mode);
|
||||
cloned!(self.mapping);
|
||||
async move {
|
||||
let result = derive_impl::<M::Value, _>(&ctx, &repo, &mapping, csid, mode).await?;
|
||||
let result = derive_impl::<M::Value, _>(&ctx, &repo, &mapping, csid).await?;
|
||||
Ok(format!("{:?}", result))
|
||||
}
|
||||
.boxed()
|
||||
@ -242,14 +241,7 @@ where
|
||||
for csid in csids {
|
||||
// create new context so each derivation batch would have its own trace
|
||||
let ctx = CoreContext::new_with_logger(ctx.fb, ctx.logger().clone());
|
||||
derive_impl::<M::Value, _>(
|
||||
&ctx,
|
||||
&repo,
|
||||
&in_memory_mapping,
|
||||
csid,
|
||||
DeriveMode::Unsafe,
|
||||
)
|
||||
.await?;
|
||||
derive_impl::<M::Value, _>(&ctx, &repo, &in_memory_mapping, csid).await?;
|
||||
}
|
||||
|
||||
// flush blobstore
|
||||
@ -403,58 +395,76 @@ pub fn derived_data_utils(
|
||||
repo: &BlobRepo,
|
||||
name: impl AsRef<str>,
|
||||
) -> Result<Arc<dyn DerivedUtils>, Error> {
|
||||
derived_data_utils_impl(repo, name, DeriveMode::OnlyIfEnabled)
|
||||
let name = name.as_ref();
|
||||
let config = repo.get_derived_data_config();
|
||||
let types_config = if config.enabled.types.contains(name) {
|
||||
&config.enabled
|
||||
} else {
|
||||
return Err(anyhow!("Derived data type {} is not configured", name));
|
||||
};
|
||||
derived_data_utils_impl(repo, name, types_config)
|
||||
}
|
||||
|
||||
pub fn derived_data_utils_unsafe(
|
||||
pub fn derived_data_utils_for_backfill(
|
||||
repo: &BlobRepo,
|
||||
name: impl AsRef<str>,
|
||||
) -> Result<Arc<dyn DerivedUtils>, Error> {
|
||||
derived_data_utils_impl(repo, name, DeriveMode::Unsafe)
|
||||
let name = name.as_ref();
|
||||
let config = repo.get_derived_data_config();
|
||||
let types_config = if config.backfilling.types.contains(name) {
|
||||
&config.backfilling
|
||||
} else if config.enabled.types.contains(name) {
|
||||
&config.enabled
|
||||
} else {
|
||||
return Err(anyhow!(
|
||||
"Derived data type {} is not configured for backfilling",
|
||||
name
|
||||
));
|
||||
};
|
||||
derived_data_utils_impl(repo, name, types_config)
|
||||
}
|
||||
|
||||
fn derived_data_utils_impl(
|
||||
repo: &BlobRepo,
|
||||
name: impl AsRef<str>,
|
||||
mode: DeriveMode,
|
||||
name: &str,
|
||||
config: &DerivedDataTypesConfig,
|
||||
) -> Result<Arc<dyn DerivedUtils>, Error> {
|
||||
let repo = &repo;
|
||||
match name.as_ref() {
|
||||
match name {
|
||||
RootUnodeManifestId::NAME => {
|
||||
let mapping = RootUnodeManifestMapping::new(repo)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode)))
|
||||
let mapping = RootUnodeManifestMapping::new(repo, config)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping)))
|
||||
}
|
||||
RootFastlog::NAME => {
|
||||
let mapping = RootFastlogMapping::new(repo)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode)))
|
||||
let mapping = RootFastlogMapping::new(repo, config)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping)))
|
||||
}
|
||||
MappedHgChangesetId::NAME => {
|
||||
let mapping = HgChangesetIdMapping::new(repo);
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode)))
|
||||
let mapping = HgChangesetIdMapping::new(repo, config)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping)))
|
||||
}
|
||||
RootFsnodeId::NAME => {
|
||||
let mapping = RootFsnodeMapping::new(repo)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode)))
|
||||
let mapping = RootFsnodeMapping::new(repo, config)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping)))
|
||||
}
|
||||
BlameRoot::NAME => {
|
||||
let mapping = BlameRootMapping::new(repo)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode)))
|
||||
let mapping = BlameRootMapping::new(repo, config)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping)))
|
||||
}
|
||||
ChangesetInfo::NAME => {
|
||||
let mapping = ChangesetInfoMapping::new(repo)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode)))
|
||||
let mapping = ChangesetInfoMapping::new(repo, config)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping)))
|
||||
}
|
||||
RootDeletedManifestId::NAME => {
|
||||
let mapping = RootDeletedManifestMapping::new(repo)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode)))
|
||||
let mapping = RootDeletedManifestMapping::new(repo, config)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping)))
|
||||
}
|
||||
FilenodesOnlyPublic::NAME => {
|
||||
let mapping = FilenodesOnlyPublicMapping::new(repo)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode)))
|
||||
let mapping = FilenodesOnlyPublicMapping::new(repo, config)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping)))
|
||||
}
|
||||
RootSkeletonManifestId::NAME => {
|
||||
let mapping = RootSkeletonManifestMapping::new(repo)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping, mode)))
|
||||
let mapping = RootSkeletonManifestMapping::new(repo, config)?;
|
||||
Ok(Arc::new(DerivedUtilsFromMapping::new(mapping)))
|
||||
}
|
||||
name => Err(format_err!("Unsupported derived data type: {}", name)),
|
||||
}
|
||||
|
@ -20,7 +20,9 @@ use std::sync::Arc;
|
||||
|
||||
use blobrepo::BlobRepo;
|
||||
use blobstore::{Blobstore, Storable};
|
||||
use derived_data::{BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError};
|
||||
use derived_data::{
|
||||
BonsaiDerivable, BonsaiDerived, BonsaiDerivedMapping, DeriveError, DerivedDataTypesConfig,
|
||||
};
|
||||
use filestore::{self, FetchKey};
|
||||
use mononoke_types::{BonsaiChangeset, ChangesetId, MPath};
|
||||
|
||||
@ -33,7 +35,7 @@ pub struct TreeMapping {
|
||||
}
|
||||
|
||||
impl TreeMapping {
|
||||
pub fn new(blobstore: Arc<dyn Blobstore>) -> Self {
|
||||
pub fn new(blobstore: Arc<dyn Blobstore>, _config: &DerivedDataTypesConfig) -> Self {
|
||||
Self { blobstore }
|
||||
}
|
||||
|
||||
@ -113,7 +115,8 @@ impl BonsaiDerived for TreeHandle {
|
||||
_ctx: &CoreContext,
|
||||
repo: &BlobRepo,
|
||||
) -> Result<Self::DefaultMapping, DeriveError> {
|
||||
Ok(TreeMapping::new(repo.blobstore().boxed()))
|
||||
let config = derived_data::enabled_type_config(repo, Self::NAME)?;
|
||||
Ok(TreeMapping::new(repo.blobstore().boxed(), config))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -431,18 +431,18 @@ mod test {
|
||||
use ascii::AsciiString;
|
||||
use bookmarks_types::BookmarkName;
|
||||
use cached_config::TestSource;
|
||||
use maplit::{btreemap, btreeset, hashmap};
|
||||
use maplit::{btreemap, hashmap, hashset};
|
||||
use metaconfig_types::{
|
||||
BlobConfig, BlobstoreId, BookmarkParams, Bundle2ReplayParams, CacheWarmupParams,
|
||||
CommitSyncConfigVersion, CommitSyncDirection, DatabaseConfig,
|
||||
DefaultSmallToLargeCommitSyncPathAction, DerivedDataConfig, FilestoreParams, HookBypass,
|
||||
HookConfig, HookManagerParams, HookParams, InfinitepushNamespace, InfinitepushParams,
|
||||
LfsParams, LocalDatabaseConfig, MetadataDatabaseConfig, MultiplexId, MultiplexedStoreType,
|
||||
PushParams, PushrebaseFlags, PushrebaseParams, RemoteDatabaseConfig,
|
||||
RemoteMetadataDatabaseConfig, RepoClientKnobs, SegmentedChangelogConfig,
|
||||
ShardableRemoteDatabaseConfig, ShardedRemoteDatabaseConfig, SmallRepoCommitSyncConfig,
|
||||
SourceControlServiceMonitoring, SourceControlServiceParams, UnodeVersion,
|
||||
WireprotoLoggingConfig,
|
||||
DefaultSmallToLargeCommitSyncPathAction, DerivedDataConfig, DerivedDataTypesConfig,
|
||||
FilestoreParams, HookBypass, HookConfig, HookManagerParams, HookParams,
|
||||
InfinitepushNamespace, InfinitepushParams, LfsParams, LocalDatabaseConfig,
|
||||
MetadataDatabaseConfig, MultiplexId, MultiplexedStoreType, PushParams, PushrebaseFlags,
|
||||
PushrebaseParams, RemoteDatabaseConfig, RemoteMetadataDatabaseConfig, RepoClientKnobs,
|
||||
SegmentedChangelogConfig, ShardableRemoteDatabaseConfig, ShardedRemoteDatabaseConfig,
|
||||
SmallRepoCommitSyncConfig, SourceControlServiceMonitoring, SourceControlServiceParams,
|
||||
UnodeVersion, WireprotoLoggingConfig,
|
||||
};
|
||||
use mononoke_types::MPath;
|
||||
use nonzero_ext::nonzero;
|
||||
@ -719,12 +719,10 @@ mod test {
|
||||
all_hooks_bypassed=false
|
||||
bypassed_commits_scuba_table="commits_bypassed_hooks"
|
||||
|
||||
[derived_data_config]
|
||||
derived_data_types=["fsnodes"]
|
||||
override_blame_filesize_limit=101
|
||||
|
||||
[derived_data_config.raw_unode_version]
|
||||
unode_version_v2 = {}
|
||||
[derived_data_config.enabled]
|
||||
types = ["fsnodes", "unodes", "blame"]
|
||||
unode_version = 2
|
||||
blame_filesize_limit = 101
|
||||
|
||||
[storage.main.metadata.remote]
|
||||
primary = { db_address = "db_address" }
|
||||
@ -1025,10 +1023,17 @@ mod test {
|
||||
],
|
||||
}),
|
||||
derived_data_config: DerivedDataConfig {
|
||||
derived_data_types: btreeset![String::from("fsnodes")],
|
||||
enabled: DerivedDataTypesConfig {
|
||||
types: hashset! {
|
||||
String::from("fsnodes"),
|
||||
String::from("unodes"),
|
||||
String::from("blame"),
|
||||
},
|
||||
unode_version: UnodeVersion::V2,
|
||||
blame_filesize_limit: Some(101),
|
||||
},
|
||||
backfilling: DerivedDataTypesConfig::default(),
|
||||
scuba_table: None,
|
||||
unode_version: UnodeVersion::V2,
|
||||
override_blame_filesize_limit: Some(101),
|
||||
},
|
||||
hgsql_name: HgsqlName("fbsource".to_string()),
|
||||
hgsql_globalrevs_name: HgsqlGlobalrevsName("fbsource".to_string()),
|
||||
|
@ -16,7 +16,7 @@ pub trait Convert {
|
||||
/// Conversion target
|
||||
type Output;
|
||||
|
||||
/// Try to convert `self` into `Self::Outpuf`
|
||||
/// Try to convert `self` into `Self::Output`
|
||||
fn convert(self) -> Result<Self::Output>;
|
||||
}
|
||||
|
||||
|
@ -12,20 +12,20 @@ use anyhow::{anyhow, Context, Result};
|
||||
use bookmarks_types::BookmarkName;
|
||||
use metaconfig_types::{
|
||||
BookmarkOrRegex, BookmarkParams, Bundle2ReplayParams, CacheWarmupParams,
|
||||
CommitcloudBookmarksFillerMode, ComparableRegex, DerivedDataConfig, HookBypass, HookConfig,
|
||||
HookManagerParams, HookParams, InfinitepushNamespace, InfinitepushParams, LfsParams,
|
||||
PushParams, PushrebaseFlags, PushrebaseParams, RepoClientKnobs, SegmentedChangelogConfig,
|
||||
ServiceWriteRestrictions, SourceControlServiceMonitoring, SourceControlServiceParams,
|
||||
StorageConfig, UnodeVersion, WireprotoLoggingConfig,
|
||||
CommitcloudBookmarksFillerMode, ComparableRegex, DerivedDataConfig, DerivedDataTypesConfig,
|
||||
HookBypass, HookConfig, HookManagerParams, HookParams, InfinitepushNamespace,
|
||||
InfinitepushParams, LfsParams, PushParams, PushrebaseFlags, PushrebaseParams, RepoClientKnobs,
|
||||
SegmentedChangelogConfig, ServiceWriteRestrictions, SourceControlServiceMonitoring,
|
||||
SourceControlServiceParams, StorageConfig, UnodeVersion, WireprotoLoggingConfig,
|
||||
};
|
||||
use mononoke_types::{MPath, PrefixTrie};
|
||||
use regex::Regex;
|
||||
use repos::{
|
||||
RawBookmarkConfig, RawBundle2ReplayParams, RawCacheWarmupConfig, RawCommitcloudBookmarksFiller,
|
||||
RawDerivedDataConfig, RawHookConfig, RawHookManagerParams, RawInfinitepushParams, RawLfsParams,
|
||||
RawPushParams, RawPushrebaseParams, RawRepoClientKnobs, RawSegmentedChangelogConfig,
|
||||
RawServiceWriteRestrictions, RawSourceControlServiceMonitoring, RawSourceControlServiceParams,
|
||||
RawUnodeVersion, RawWireprotoLoggingConfig,
|
||||
RawDerivedDataConfig, RawDerivedDataTypesConfig, RawHookConfig, RawHookManagerParams,
|
||||
RawInfinitepushParams, RawLfsParams, RawPushParams, RawPushrebaseParams, RawRepoClientKnobs,
|
||||
RawSegmentedChangelogConfig, RawServiceWriteRestrictions, RawSourceControlServiceMonitoring,
|
||||
RawSourceControlServiceParams, RawWireprotoLoggingConfig,
|
||||
};
|
||||
|
||||
use crate::convert::Convert;
|
||||
@ -367,29 +367,34 @@ impl Convert for RawSourceControlServiceMonitoring {
|
||||
}
|
||||
}
|
||||
|
||||
impl Convert for RawDerivedDataTypesConfig {
|
||||
type Output = DerivedDataTypesConfig;
|
||||
|
||||
fn convert(self) -> Result<Self::Output> {
|
||||
let types = self.types.into_iter().collect();
|
||||
let unode_version = match self.unode_version {
|
||||
None => UnodeVersion::default(),
|
||||
Some(1) => UnodeVersion::V1,
|
||||
Some(2) => UnodeVersion::V2,
|
||||
Some(version) => return Err(anyhow!("unknown unode version {}", version)),
|
||||
};
|
||||
let blame_filesize_limit = self.blame_filesize_limit.map(|limit| limit as u64);
|
||||
Ok(DerivedDataTypesConfig {
|
||||
types,
|
||||
unode_version,
|
||||
blame_filesize_limit,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Convert for RawDerivedDataConfig {
|
||||
type Output = DerivedDataConfig;
|
||||
|
||||
fn convert(self) -> Result<Self::Output> {
|
||||
let unode_version = if let Some(unode_version) = self.raw_unode_version {
|
||||
match unode_version {
|
||||
RawUnodeVersion::unode_version_v1(_) => UnodeVersion::V1,
|
||||
RawUnodeVersion::unode_version_v2(_) => UnodeVersion::V2,
|
||||
RawUnodeVersion::UnknownField(field) => {
|
||||
return Err(anyhow!("unknown unode version {}", field));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
UnodeVersion::default()
|
||||
};
|
||||
|
||||
Ok(DerivedDataConfig {
|
||||
scuba_table: self.scuba_table,
|
||||
derived_data_types: self.derived_data_types.unwrap_or_default(),
|
||||
unode_version,
|
||||
override_blame_filesize_limit: self
|
||||
.override_blame_filesize_limit
|
||||
.map(|limit| limit as u64),
|
||||
enabled: self.enabled.convert()?.unwrap_or_default(),
|
||||
backfilling: self.backfilling.convert()?.unwrap_or_default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
use anyhow::{anyhow, Error, Result};
|
||||
use std::{
|
||||
collections::{BTreeSet, HashMap, HashSet},
|
||||
collections::{HashMap, HashSet},
|
||||
fmt, mem,
|
||||
num::{NonZeroU64, NonZeroUsize},
|
||||
ops::Deref,
|
||||
@ -214,14 +214,33 @@ pub struct RepoClientKnobs {
|
||||
pub struct DerivedDataConfig {
|
||||
/// Name of scuba table where all derivation will be logged to
|
||||
pub scuba_table: Option<String>,
|
||||
/// Types of derived data that can be derived for this repo
|
||||
pub derived_data_types: BTreeSet<String>,
|
||||
/// What unode version should be used (defaults to V1)
|
||||
|
||||
/// Configuration for enabled derived data types.
|
||||
pub enabled: DerivedDataTypesConfig,
|
||||
|
||||
/// Configuration for backfilling derived data types.
|
||||
pub backfilling: DerivedDataTypesConfig,
|
||||
}
|
||||
|
||||
impl DerivedDataConfig {
|
||||
/// Returns whether the named derived data type is enabled.
|
||||
pub fn is_enabled(&self, name: &str) -> bool {
|
||||
self.enabled.types.contains(name)
|
||||
}
|
||||
}
|
||||
|
||||
/// Config for derived data types
|
||||
#[derive(Eq, Clone, Default, Debug, PartialEq)]
|
||||
pub struct DerivedDataTypesConfig {
|
||||
/// The configured types.
|
||||
pub types: HashSet<String>,
|
||||
|
||||
/// What unode version should be used. Default: V1.
|
||||
pub unode_version: UnodeVersion,
|
||||
|
||||
/// Override the file size limit for blame. Blame won't be derived for files which
|
||||
/// size is above the limit. NOTE: if `override_blame_filesize_limit` is None
|
||||
/// then a default limit will be used!
|
||||
pub override_blame_filesize_limit: Option<u64>,
|
||||
/// size is above the limit. Default: `blame::BLAME_FILESIZE_LIMIT`.
|
||||
pub blame_filesize_limit: Option<u64>,
|
||||
}
|
||||
|
||||
/// What type of unode derived data to generate
|
||||
|
@ -28,6 +28,7 @@ hook_manager_factory = { path = "../hooks/hook_manager_factory" }
|
||||
hooks = { path = "../hooks" }
|
||||
live_commit_sync_config = { path = "../commit_rewriting/live_commit_sync_config" }
|
||||
manifest = { path = "../manifest" }
|
||||
mercurial_derived_data = { path = "../derived_data/mercurial_derived_data" }
|
||||
mercurial_types = { path = "../mercurial/types" }
|
||||
metaconfig_parser = { path = "../metaconfig/parser" }
|
||||
metaconfig_types = { path = "../metaconfig/types" }
|
||||
|
@ -36,6 +36,7 @@ use hooks::HookManager;
|
||||
use itertools::Itertools;
|
||||
use live_commit_sync_config::TestLiveCommitSyncConfig;
|
||||
use live_commit_sync_config::{CfgrLiveCommitSyncConfig, LiveCommitSyncConfig};
|
||||
use mercurial_derived_data::MappedHgChangesetId;
|
||||
use mercurial_types::Globalrev;
|
||||
use metaconfig_types::{CommonConfig, RepoConfig};
|
||||
use metaconfig_types::{
|
||||
@ -717,15 +718,13 @@ impl RepoContext {
|
||||
pub fn derive_changeset_info_enabled(&self) -> bool {
|
||||
self.blob_repo()
|
||||
.get_derived_data_config()
|
||||
.derived_data_types
|
||||
.contains(ChangesetInfo::NAME)
|
||||
.is_enabled(ChangesetInfo::NAME)
|
||||
}
|
||||
|
||||
pub fn derive_hgchangesets_enabled(&self) -> bool {
|
||||
self.blob_repo()
|
||||
.get_derived_data_config()
|
||||
.derived_data_types
|
||||
.contains("hgchangesets")
|
||||
.is_enabled(MappedHgChangesetId::NAME)
|
||||
}
|
||||
|
||||
/// Look up a changeset specifier to find the canonical bonsai changeset
|
||||
|
@ -1213,8 +1213,7 @@ impl<'r> Bundle2Resolver<'r> {
|
||||
let has_skeleton_manifests = self
|
||||
.repo
|
||||
.get_derived_data_config()
|
||||
.derived_data_types
|
||||
.contains(RootSkeletonManifestId::NAME);
|
||||
.is_enabled(RootSkeletonManifestId::NAME);
|
||||
let skip_on_upload = tunables().get_skip_case_conflict_check_on_changeset_upload();
|
||||
// If casefolding checks are enabled we need to check on upload if
|
||||
// skeleton manifests are disabled for this repo, or if skipping
|
||||
|
@ -318,18 +318,16 @@ async fn derive_bonsais_single_repo(
|
||||
repo: &BlobRepo,
|
||||
bcs_ids: &[ChangesetId],
|
||||
) -> Result<(), Error> {
|
||||
let derived_data_types = &repo.get_derived_data_config().derived_data_types;
|
||||
let derived_data_types = &repo.get_derived_data_config().enabled.types;
|
||||
|
||||
let len = derived_data_types.len();
|
||||
let mut derived_utils = vec![];
|
||||
for ty in derived_data_types {
|
||||
let utils = derived_data_utils(repo, ty)?;
|
||||
derived_utils.push(utils);
|
||||
}
|
||||
let derived_utils: Vec<_> = derived_data_types
|
||||
.iter()
|
||||
.map(|ty| derived_data_utils(repo, ty))
|
||||
.collect::<Result<_, _>>()?;
|
||||
|
||||
stream::iter(derived_utils)
|
||||
.map(Ok)
|
||||
.try_for_each_concurrent(len, |derived_util| async move {
|
||||
.try_for_each_concurrent(derived_data_types.len(), |derived_util| async move {
|
||||
for csid in bcs_ids {
|
||||
derived_util
|
||||
.derive(ctx.clone(), repo.clone(), csid.clone())
|
||||
|
@ -74,7 +74,8 @@ mod tests {
|
||||
)?;
|
||||
repo = repo.dangerous_override(|mut derived_data_config: DerivedDataConfig| {
|
||||
derived_data_config
|
||||
.derived_data_types
|
||||
.enabled
|
||||
.types
|
||||
.remove(&TreeHandle::NAME.to_string());
|
||||
derived_data_config
|
||||
});
|
||||
@ -765,7 +766,7 @@ mod tests {
|
||||
repo: &BlobRepo,
|
||||
cs_ids: &[ChangesetId],
|
||||
) -> Result<()> {
|
||||
let derived_data_types = &repo.get_derived_data_config().derived_data_types;
|
||||
let derived_data_types = &repo.get_derived_data_config().enabled.types;
|
||||
|
||||
for derived_data_type in derived_data_types {
|
||||
let derived_utils = derived_data_utils(repo, derived_data_type)?;
|
||||
|
@ -26,7 +26,7 @@ use futures::{
|
||||
use futures_ext::{try_boxfuture, BoxFuture, FutureExt};
|
||||
use futures_old::{future, Future};
|
||||
use hook_manager_factory::make_hook_manager;
|
||||
use maplit::btreeset;
|
||||
use maplit::hashset;
|
||||
use mercurial_derived_data::MappedHgChangesetId;
|
||||
use metaconfig_types::{
|
||||
CensoredScubaParams, CommitSyncConfig, RepoClientKnobs, RepoConfig, WireprotoLoggingConfig,
|
||||
@ -282,7 +282,7 @@ pub fn repo_handlers(
|
||||
let mut warm_bookmarks_cache_builder =
|
||||
WarmBookmarksCacheBuilder::new(&ctx, &blobrepo);
|
||||
warm_bookmarks_cache_builder.add_derived_data_warmers(
|
||||
&btreeset! { MappedHgChangesetId::NAME.to_string() },
|
||||
&hashset! { MappedHgChangesetId::NAME.to_string() },
|
||||
)?;
|
||||
if warm_bookmark_cache_check_blobimport {
|
||||
let mutable_counters =
|
||||
|
@ -908,13 +908,13 @@ write_infinitepush_config "$reponame"
|
||||
|
||||
if [[ -n "${ENABLED_DERIVED_DATA:-}" ]]; then
|
||||
cat >> "repos/$reponame/server.toml" <<CONFIG
|
||||
[derived_data_config]
|
||||
derived_data_types = $ENABLED_DERIVED_DATA
|
||||
[derived_data_config.enabled]
|
||||
types = $ENABLED_DERIVED_DATA
|
||||
CONFIG
|
||||
else
|
||||
cat >> "repos/$reponame/server.toml" <<CONFIG
|
||||
[derived_data_config]
|
||||
derived_data_types=["blame", "changeset_info", "deleted_manifest", "fastlog", "filenodes", "fsnodes", "unodes", "hgchangesets", "skeleton_manifests"]
|
||||
[derived_data_config.enabled]
|
||||
types=["blame", "changeset_info", "deleted_manifest", "fastlog", "filenodes", "fsnodes", "unodes", "hgchangesets", "skeleton_manifests"]
|
||||
CONFIG
|
||||
fi
|
||||
|
||||
|
@ -801,7 +801,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_all_derived_data_types_supported() {
|
||||
// All types blobrepo can support
|
||||
let a = init_all_derived_data().derived_data_types;
|
||||
let a = init_all_derived_data().enabled.types;
|
||||
|
||||
// supported in graph
|
||||
let mut s = HashSet::new();
|
||||
|
@ -859,7 +859,7 @@ pub fn setup_common<'a>(
|
||||
// Only walk derived node types that the repo is configured to contain
|
||||
include_node_types.retain(|t| {
|
||||
if let Some(t) = t.derived_data_name() {
|
||||
config.derived_data_config.derived_data_types.contains(t)
|
||||
config.derived_data_config.is_enabled(t)
|
||||
} else {
|
||||
true
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user