mononoke: fix benchmark after migration to derived data manager

Summary:
Derived data manager now doesn't allow deriving a batch of commits if all
ancestors weren't derived yet (and that's a good idea to do this check).

But it started to break benchmark if --batch-size, --backfill and --parallel
options are set, because in the very
beginning of the function we mark all commits as not derived, and when we start
deriving the second batch the first batch is assumed to not be derived, and
this triggers derived data manager check.

Let's instead mark only commits that we are about to derive as not derived, and
clear this check once we are done.

Reviewed By: mitrandir77

Differential Revision: D31140464

fbshipit-source-id: fc74d58dc3c4a3ad70e8e2527f7d6dfc8fde8a9c
This commit is contained in:
Stanislau Hlebik 2021-09-23 09:56:44 -07:00 committed by Facebook GitHub Bot
parent 1190e2247e
commit 62485aafcb
3 changed files with 23 additions and 1 deletions

View File

@ -129,7 +129,6 @@ pub async fn regenerate_derived_data(
));
}
let csids = topo_sort(&ctx, &repo, csids).await?;
derived_utils.regenerate(&csids);
match opts {
DeriveOptions::Simple => derive_simple(&ctx, &repo, csids, derived_utils).await,
@ -146,6 +145,7 @@ async fn derive_simple(
csids: Vec<ChangesetId>,
derived_data_utils: Arc<dyn DerivedUtils>,
) -> Result<BenchmarkResult, Error> {
derived_data_utils.regenerate(&csids);
let (stats, per_commit_stats) = async {
let mut per_commit_stats = vec![];
for csid in csids {
@ -172,6 +172,7 @@ async fn derive_with_backfill(
csids: Vec<ChangesetId>,
derived_data_utils: Arc<dyn DerivedUtils>,
) -> Result<BenchmarkResult, Error> {
derived_data_utils.regenerate(&csids);
let (stats, backfill_derive_stats) = derived_data_utils
.backfill_batch_dangerous(
ctx.clone(),
@ -209,6 +210,7 @@ async fn derive_with_parallel_backfill(
let (stats, ()) = async {
for chunk in csids.chunks(batch_size as usize) {
derived_data_utils.regenerate(&chunk.to_vec());
derived_data_utils
.backfill_batch_dangerous(
ctx.clone(),
@ -218,6 +220,7 @@ async fn derive_with_parallel_backfill(
None,
)
.await?;
derived_data_utils.clear_regenerate();
}
Result::<_, Error>::Ok(())
}

View File

@ -116,6 +116,10 @@ impl<M> RegenerateMapping<M> {
pub fn regenerate<I: IntoIterator<Item = ChangesetId>>(&self, csids: I) {
self.regenerate.with(|regenerate| regenerate.extend(csids))
}
pub fn clear_regenerate(&self) {
self.regenerate.with(|regenerate| regenerate.clear());
}
}
#[async_trait]

View File

@ -181,6 +181,9 @@ pub trait DerivedUtils: Send + Sync + 'static {
/// Regenerate derived data for specified set of commits
fn regenerate(&self, csids: &Vec<ChangesetId>);
/// Remove all previously set regenerations
fn clear_regenerate(&self);
/// Get a name for this type of derived data
fn name(&self) -> &'static str;
@ -399,6 +402,10 @@ where
self.orig_mapping.regenerate(csids.iter().copied())
}
fn clear_regenerate(&self) {
self.orig_mapping.clear_regenerate()
}
fn name(&self) -> &'static str {
Derivable::NAME
}
@ -604,6 +611,10 @@ where
.with(|rederive| rederive.extend(csids.iter().copied()));
}
fn clear_regenerate(&self) {
self.rederive.with(|rederive| rederive.clear());
}
fn name(&self) -> &'static str {
Derivable::NAME
}
@ -1402,6 +1413,10 @@ mod tests {
unimplemented!()
}
fn clear_regenerate(&self) {
unimplemented!()
}
fn name(&self) -> &'static str {
self.deriver.name()
}