tests: add test for derived data tailer

Reviewed By: krallin

Differential Revision: D25562158

fbshipit-source-id: 2ff917c4ae2f7c4b273b91d3f742bab6b05d8b46
This commit is contained in:
Mark Juggurnauth-Thomas 2020-12-16 10:36:27 -08:00 committed by Facebook GitHub Bot
parent 5eaf0cfde2
commit 91f2d07dbe
5 changed files with 189 additions and 7 deletions

View File

@ -16,7 +16,9 @@ use cmdlib::{
};
use context::CoreContext;
use derived_data::{BonsaiDerivable, BonsaiDerived};
use derived_data_utils::{derived_data_utils, POSSIBLE_DERIVED_TYPES};
use derived_data_utils::{
derived_data_utils, derived_data_utils_for_backfill, POSSIBLE_DERIVED_TYPES,
};
use fbinit::FacebookInit;
use fsnodes::RootFsnodeId;
use futures::{
@ -44,6 +46,7 @@ const SUBCOMMAND_VERIFY_MANIFESTS: &str = "verify-manifests";
const ARG_HASH_OR_BOOKMARK: &str = "hash-or-bookmark";
const ARG_TYPE: &str = "type";
const ARG_IF_DERIVED: &str = "if-derived";
const ARG_BACKFILL: &str = "backfill";
const MANIFEST_DERIVED_DATA_TYPES: &'static [&'static str] = &[
RootFsnodeId::NAME,
@ -58,6 +61,11 @@ pub fn build_subcommand<'a, 'b>() -> App<'a, 'b> {
.subcommand(
SubCommand::with_name(SUBCOMMAND_EXISTS)
.about("check if derived data has been generated")
.arg(
Arg::with_name(ARG_BACKFILL)
.long("backfill")
.help("use backfilling config rather than enabled config"),
)
.arg(
Arg::with_name(ARG_TYPE)
.help("type of derived data")
@ -120,7 +128,10 @@ pub async fn subcommand_derived_data<'a>(
.map(|m| m.to_string())
.unwrap();
check_derived_data_exists(ctx, repo, derived_data_type, hashes_or_bookmarks).await
let backfill = arg_matches.is_present(ARG_BACKFILL);
check_derived_data_exists(ctx, repo, derived_data_type, hashes_or_bookmarks, backfill)
.await
}
(SUBCOMMAND_VERIFY_MANIFESTS, Some(arg_matches)) => {
let hash_or_bookmark = arg_matches
@ -158,8 +169,13 @@ async fn check_derived_data_exists(
repo: BlobRepo,
derived_data_type: String,
hashes_or_bookmarks: Vec<String>,
backfill: bool,
) -> Result<(), SubcommandError> {
let derived_utils = derived_data_utils(&repo, derived_data_type)?;
let derived_utils = if backfill {
derived_data_utils_for_backfill(&repo, derived_data_type)?
} else {
derived_data_utils(&repo, derived_data_type)?
};
let cs_id_futs: Vec<_> = hashes_or_bookmarks
.into_iter()

View File

@ -39,6 +39,10 @@ pub const SKIPLIST: &str = "skiplist";
const SKIPLIST_BUILD: &str = "build";
const ARG_SPARSE: &str = "sparse";
const SKIPLIST_READ: &str = "read";
const ARG_EXPONENT: &str = "exponent";
// skiplist will jump up to 2^9 changesets
const DEFAULT_SKIPLIST_EXPONENT_STR: &str = "9";
pub fn build_subcommand<'a, 'b>() -> App<'a, 'b> {
SubCommand::with_name(SKIPLIST)
@ -61,6 +65,12 @@ pub fn build_subcommand<'a, 'b>() -> App<'a, 'b> {
Arg::with_name(ARG_SPARSE)
.long(ARG_SPARSE)
.help("EXPERIMENTAL: build sparse skiplist. Makes skiplist smaller"),
)
.arg(
Arg::with_name(ARG_EXPONENT)
.long(ARG_EXPONENT)
.default_value(DEFAULT_SKIPLIST_EXPONENT_STR)
.help("Skiplist will skip up to 2^EXPONENT commits"),
),
)
.subcommand(
@ -105,6 +115,11 @@ pub async fn subcommand_skiplist<'a>(
.to_string();
let rebuild = sub_m.is_present("rebuild");
let skiplist_ty = SkiplistType::new(sub_m.is_present(ARG_SPARSE));
let exponent = sub_m
.value_of(ARG_EXPONENT)
.expect("exponent must be set")
.parse::<u32>()
.map_err(Error::from)?;
args::init_cachelib(fb, &matches);
let config_store = args::init_config_store(fb, &logger, matches)?;
@ -120,6 +135,7 @@ pub async fn subcommand_skiplist<'a>(
&sql_changesets,
rebuild,
skiplist_ty,
exponent,
)
.await
.map_err(SubcommandError::Error)
@ -162,10 +178,11 @@ async fn build_skiplist_index<'a, S: ToString>(
sql_changesets: &'a SqlChangesets,
force_full_rebuild: bool,
skiplist_ty: SkiplistType,
exponent: u32,
) -> Result<(), Error> {
let blobstore = repo.get_blobstore();
// skiplist will jump up to 2^9 changesets
let skiplist_depth = 10;
// Depth must be one more than the maximum exponent.
let skiplist_depth = exponent + 1;
// Index all changesets
let max_index_depth = 20000000000;
let key = key.to_string();

View File

@ -72,6 +72,7 @@ const ARG_REGENERATE: &str = "regenerate";
const ARG_PREFETCHED_COMMITS_PATH: &str = "prefetched-commits-path";
const ARG_CHANGESET: &str = "changeset";
const ARG_USE_SHARED_LEASES: &str = "use-shared-leases";
const ARG_STOP_ON_IDLE: &str = "stop-on-idle";
const ARG_BATCHED: &str = "batched";
const ARG_BATCH_SIZE: &str = "batch-size";
const ARG_PARALLEL: &str = "parallel";
@ -208,6 +209,11 @@ fn main(fb: FacebookInit) -> Result<()> {
"is obtained.",
)),
)
.arg(
Arg::with_name(ARG_STOP_ON_IDLE)
.long(ARG_STOP_ON_IDLE)
.help("Stop tailing or backfilling when there is nothing left"),
)
.arg(
Arg::with_name(ARG_BATCHED)
.long(ARG_BATCHED)
@ -474,6 +480,7 @@ async fn run_subcmd<'a>(
let config_store = args::init_config_store(fb, logger, matches)?;
let (_, config) = args::get_config_by_repoid(config_store, matches, repo.get_repoid())?;
let use_shared_leases = sub_m.is_present(ARG_USE_SHARED_LEASES);
let stop_on_idle = sub_m.is_present(ARG_STOP_ON_IDLE);
let batched = sub_m.is_present(ARG_BATCHED);
let parallel = sub_m.is_present(ARG_PARALLEL);
let batch_size = if batched {
@ -502,6 +509,7 @@ async fn run_subcmd<'a>(
repo,
config,
use_shared_leases,
stop_on_idle,
batch_size,
parallel,
backfill,
@ -748,6 +756,7 @@ async fn subcommand_tail(
repo: BlobRepo,
config: RepoConfig,
use_shared_leases: bool,
stop_on_idle: bool,
batch_size: Option<usize>,
parallel: bool,
mut backfill: bool,
@ -845,11 +854,19 @@ async fn subcommand_tail(
Ok(heads) => heads.into_iter().collect::<HashSet<_>>(),
Err(e) => return Err::<(), _>(e),
};
let underived_heads = heads
.difference(&derived_heads)
.cloned()
.collect::<Vec<_>>();
if stop_on_idle && underived_heads.is_empty() {
info!(ctx.logger(), "tail stopping due to --stop-on-idle");
return Ok(());
}
tail_batch_iteration(
&ctx,
&repo,
&tail_derivers,
heads.difference(&derived_heads).cloned().collect(),
underived_heads,
batch_size,
parallel,
)
@ -867,12 +884,16 @@ async fn subcommand_tail(
if backfill {
let mut derived_heads = HashSet::new();
while let Some(heads) = receiver.recv().await {
let underived_heads = heads
.difference(&derived_heads)
.cloned()
.collect::<Vec<_>>();
backfill_heads(
&ctx,
&repo,
skiplist_index.as_deref(),
&backfill_derivers,
heads.difference(&derived_heads).cloned().collect(),
underived_heads,
slice_size,
batch_size,
parallel,
@ -880,6 +901,7 @@ async fn subcommand_tail(
.await?;
derived_heads = heads;
}
info!(ctx.logger(), "backfill stopping");
}
Ok::<_, Error>(())
})

View File

@ -732,6 +732,12 @@ warm_bookmark_cache_check_blobimport=true
CONFIG
fi
if [[ -n "${SKIPLIST_INDEX_BLOBSTORE_KEY:-}" ]]; then
cat >> "repos/$reponame/server.toml" <<CONFIG
skiplist_index_blobstore_key="$SKIPLIST_INDEX_BLOBSTORE_KEY"
CONFIG
fi
# Normally point to common storageconfig, but if none passed, create per-repo
if [[ -z "$storageconfig" ]]; then
storageconfig="blobstore_$reponame"

View File

@ -0,0 +1,121 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License found in the LICENSE file in the root
# directory of this source tree.
$ . "${TEST_FIXTURES}/library.sh"
setup configuration
$ ENABLED_DERIVED_DATA='["hgchangesets", "filenodes"]' setup_common_config
$ cd "$TESTTMP"
$ hg init repo-hg
$ cd repo-hg
$ setup_hg_server
$ drawdag <<EOS
> M
> |
> L
> |
> K
> |
> J Q
> | |
> I P
> | |
> H O
> | |
> G N
> |/
> F
> |
> E
> |
> D
> |
> C
> |
> B
> |
> A
> EOS
$ hg bookmark main -r $M
$ hg bookmark other -r $Q
$ cd "$TESTTMP"
$ blobimport repo-hg/.hg repo
build the skiplist that will be used to slice the repository
$ mononoke_admin skiplist build skiplist_4 --exponent 2
*] using repo "repo" repoid RepositoryId(0) (glob)
*] creating a skiplist from scratch (glob)
*] build 17 skiplist nodes (glob)
enable some more derived data types for normal usage and backfilling
$ SKIPLIST_INDEX_BLOBSTORE_KEY=skiplist_4 \
> ENABLED_DERIVED_DATA='["hgchangesets", "filenodes", "unodes", "fsnodes"]' \
> setup_mononoke_config
$ cd "$TESTTMP"
$ cat >> mononoke-config/repos/repo/server.toml <<CONFIG
> [derived_data_config.backfilling]
> types=["blame", "skeleton_manifests"]
> CONFIG
start the tailer with tailing and backfilling some different types
normally the tailer runs forever, but for this test we will make it
stop when it becomes idle.
$ backfill_derived_data tail --stop-on-idle --backfill --batched --parallel --sliced --slice-size=4
*] using repo "repo" repoid RepositoryId(0) (glob)
*] tailing derived data: {"filenodes", "fsnodes", "hgchangesets", "unodes"} (glob)
*] backfilling derived data: {"blame", "filenodes", "fsnodes", "hgchangesets", "skeleton_manifests", "unodes"} (glob)
*] Fetching and initializing skiplist (glob)
*] cmap size 17, parent nodecount 0, skip nodecount 16, maxsedgelen 1, maxpedgelen 0 (glob)
*] Built skiplist (glob)
*] using batched deriver (glob)
*] found changesets: 17 * (glob)
*] deriving data 34 (glob)
*] count:17 time:* start:* end:* (glob)
*] count:17 time:* start:* end:* (glob)
*] tail stopping due to --stop-on-idle (glob)
*] Adding slice starting at generation 12 with 1 heads (1 slices queued) (glob)
*] Adding slice starting at generation 8 with 2 heads (0 slices queued) (glob)
*] Adding slice starting at generation 4 with 2 heads (0 slices queued) (glob)
*] Adding slice starting at generation 0 with 2 heads (0 slices queued) (glob)
*] Repository sliced into 4 slices requiring derivation (glob)
*] Deriving slice 0 (1/4) with 2 heads (glob)
*] found changesets: 3 * (glob)
*] deriving data 4 (glob)
*] count:2 time:* start:* end:* (glob)
*] count:2 time:* start:* end:* (glob)
*] Deriving slice 4 (2/4) with 2 heads (glob)
*] found changesets: 5 * (glob)
*] deriving data 8 (glob)
*] count:4 time:* start:* end:* (glob)
*] count:4 time:* start:* end:* (glob)
*] Deriving slice 8 (3/4) with 2 heads (glob)
*] found changesets: 7 * (glob)
*] deriving data 14 (glob)
*] count:7 time:* start:* end:* (glob)
*] count:7 time:* start:* end:* (glob)
*] Deriving slice 12 (4/4) with 1 heads (glob)
*] found changesets: 4 * (glob)
*] deriving data 8 (glob)
*] count:4 time:* start:* end:* (glob)
*] count:4 time:* start:* end:* (glob)
*] backfill stopping (glob)
$ mononoke_admin --log-level ERROR derived-data exists fsnodes main
Derived: 544c0991ef12b0621aa901dd64ef65f539246646faa940171850f5e11c84cda7
$ mononoke_admin --log-level ERROR derived-data exists fsnodes other
Derived: 39f5c6f537a8c1157a7f92a39bb036f58c03269fbe244cccaf6489fd26813467
$ mononoke_admin --log-level ERROR derived-data exists unodes main
Derived: 544c0991ef12b0621aa901dd64ef65f539246646faa940171850f5e11c84cda7
$ mononoke_admin --log-level ERROR derived-data exists unodes other
Derived: 39f5c6f537a8c1157a7f92a39bb036f58c03269fbe244cccaf6489fd26813467
$ mononoke_admin --log-level ERROR derived-data exists --backfill blame main
Derived: 544c0991ef12b0621aa901dd64ef65f539246646faa940171850f5e11c84cda7
$ mononoke_admin --log-level ERROR derived-data exists --backfill blame other
Derived: 39f5c6f537a8c1157a7f92a39bb036f58c03269fbe244cccaf6489fd26813467
$ mononoke_admin --log-level ERROR derived-data exists --backfill skeleton_manifests other
Derived: 39f5c6f537a8c1157a7f92a39bb036f58c03269fbe244cccaf6489fd26813467
$ mononoke_admin --log-level ERROR derived-data exists --backfill skeleton_manifests main
Derived: 544c0991ef12b0621aa901dd64ef65f539246646faa940171850f5e11c84cda7