sapling/eden/mononoke/hook_tailer/main.rs

297 lines
8.5 KiB
Rust
Raw Normal View History

/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#![deny(warnings)]
pub mod tailer;
use anyhow::{format_err, Error, Result};
use blobrepo::BlobRepo;
mononoke: introduce BlobrepoBuilder Summary: The goal of the whole stack is quite simple (add reponame field to BlobRepo), but this stack also tries to make it easier to initialize BlobRepo. To do that BlobrepoBuilder was added. It now accepts RepoConfig instead of 6 different fields from RepoConfig - that makes it easier to pass a field from config into BlobRepo. It also allows to customize BlobRepo. Currently it's used just to add redaction override, but later we can extend it for other use cases as well, with the hope that we'll be able to remove a bunch of repo-creation functions from cmdlib. Because of BlobrepoBuilder we no longer need open_blobrepo function. Later we might consider removing open_blobrepo_given_datasources as well. Note that this diff *adds* a few new clones. I don't consider it being a big problem, though I'm curious to hear your thoughts folks. Note that another option for the implementation would be to take a reference to objects instead of taking them by value. I briefly looked into how they used, and lot of them are passed to the objects that actually take ownership of what's inside these config fields. I.e. Blobstore essentially takes ownership of BlobstoreOptions, because it needs to store manifold bucket name. Same for scuba_censored_table, filestore_params, bookmarks_cache_ttl etc. So unless I'm missing anything, we can either pass them as reference and then we'll have to copy them, or we can just pass a value from BlobrepoBuilder directly. Reviewed By: krallin Differential Revision: D20312567 fbshipit-source-id: 14634f5e14f103b110482557254f084da1c725e1
2020-03-09 22:03:07 +03:00
use blobrepo_factory::BlobrepoBuilder;
use bookmarks::BookmarkName;
use clap::{App, Arg, ArgMatches};
use cmdlib::helpers::{block_execute, csid_resolve};
use context::CoreContext;
use fbinit::FacebookInit;
use futures::{
compat::Future01CompatExt,
future,
stream::{FuturesUnordered, StreamExt, TryStreamExt},
};
use mononoke_types::ChangesetId;
use slog::{debug, info, Logger};
use std::collections::HashSet;
use std::time::Duration;
use time_ext::DurationExt;
use tokio::{
fs::{File, OpenOptions},
io::{AsyncBufReadExt, AsyncWriteExt, BufReader},
};
use tailer::{HookExecutionInstance, Tailer};
async fn get_changesets<'a>(
matches: &'a ArgMatches<'a>,
inline_arg: &str,
file_arg: &str,
ctx: &CoreContext,
repo: &BlobRepo,
) -> Result<HashSet<ChangesetId>> {
let mut ids = matches
.values_of(inline_arg)
.map(|matches| matches.map(|cs| cs.to_string()).collect())
.unwrap_or_else(|| vec![]);
if let Some(path) = matches.value_of(file_arg) {
let file = File::open(path).await?;
let mut lines = BufReader::new(file).lines();
while let Some(line) = lines.next().await {
ids.push(line?);
}
}
let ret = ids
.into_iter()
.map(|cs| csid_resolve(ctx.clone(), repo.clone(), cs).compat())
.collect::<FuturesUnordered<_>>()
.try_collect()
.await?;
Ok(ret)
}
#[fbinit::main]
fn main(fb: FacebookInit) -> Result<()> {
let matches = setup_app().get_matches();
let (repo_name, config) = cmdlib::args::get_config(fb, &matches)?;
let logger = cmdlib::args::init_logging(fb, &matches);
info!(logger, "Hook tailer is starting");
let ctx = CoreContext::new_with_logger(fb, logger.clone());
block_execute(
run_hook_tailer(fb, &ctx, &config, &repo_name, &matches, &logger),
fb,
"hook_tailer",
&logger,
&matches,
cmdlib::monitoring::AliveService,
)
}
async fn run_hook_tailer<'a>(
fb: FacebookInit,
ctx: &CoreContext,
config: &metaconfig_types::RepoConfig,
repo_name: &str,
matches: &'a ArgMatches<'a>,
logger: &Logger,
) -> Result<(), Error> {
let bookmark_name = matches.value_of("bookmark").unwrap();
let bookmark = BookmarkName::new(bookmark_name)?;
let common_config = cmdlib::args::load_common_config(fb, &matches)?;
let limit = cmdlib::args::get_usize(&matches, "limit", 1000);
let concurrency = cmdlib::args::get_usize(&matches, "concurrency", 100);
let stats_file = matches.value_of("stats-file");
let mut stats_file = match stats_file {
Some(stats_file) => {
let mut stats_file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(stats_file)
.await?;
let header = "Changeset ID,File Count,Outcomes,Completion Time us,Poll Time us\n";
stats_file.write_all(header.as_ref()).await?;
Some(stats_file)
}
None => None,
};
let disabled_hooks = cmdlib::args::parse_disabled_hooks_no_repo_prefix(&matches, &logger);
let caching = cmdlib::args::init_cachelib(fb, &matches, None);
let readonly_storage = cmdlib::args::parse_readonly_storage(&matches);
mononoke: introduce BlobrepoBuilder Summary: The goal of the whole stack is quite simple (add reponame field to BlobRepo), but this stack also tries to make it easier to initialize BlobRepo. To do that BlobrepoBuilder was added. It now accepts RepoConfig instead of 6 different fields from RepoConfig - that makes it easier to pass a field from config into BlobRepo. It also allows to customize BlobRepo. Currently it's used just to add redaction override, but later we can extend it for other use cases as well, with the hope that we'll be able to remove a bunch of repo-creation functions from cmdlib. Because of BlobrepoBuilder we no longer need open_blobrepo function. Later we might consider removing open_blobrepo_given_datasources as well. Note that this diff *adds* a few new clones. I don't consider it being a big problem, though I'm curious to hear your thoughts folks. Note that another option for the implementation would be to take a reference to objects instead of taking them by value. I briefly looked into how they used, and lot of them are passed to the objects that actually take ownership of what's inside these config fields. I.e. Blobstore essentially takes ownership of BlobstoreOptions, because it needs to store manifold bucket name. Same for scuba_censored_table, filestore_params, bookmarks_cache_ttl etc. So unless I'm missing anything, we can either pass them as reference and then we'll have to copy them, or we can just pass a value from BlobrepoBuilder directly. Reviewed By: krallin Differential Revision: D20312567 fbshipit-source-id: 14634f5e14f103b110482557254f084da1c725e1
2020-03-09 22:03:07 +03:00
let builder = BlobrepoBuilder::new(
fb,
repo_name.into(),
mononoke: introduce BlobrepoBuilder Summary: The goal of the whole stack is quite simple (add reponame field to BlobRepo), but this stack also tries to make it easier to initialize BlobRepo. To do that BlobrepoBuilder was added. It now accepts RepoConfig instead of 6 different fields from RepoConfig - that makes it easier to pass a field from config into BlobRepo. It also allows to customize BlobRepo. Currently it's used just to add redaction override, but later we can extend it for other use cases as well, with the hope that we'll be able to remove a bunch of repo-creation functions from cmdlib. Because of BlobrepoBuilder we no longer need open_blobrepo function. Later we might consider removing open_blobrepo_given_datasources as well. Note that this diff *adds* a few new clones. I don't consider it being a big problem, though I'm curious to hear your thoughts folks. Note that another option for the implementation would be to take a reference to objects instead of taking them by value. I briefly looked into how they used, and lot of them are passed to the objects that actually take ownership of what's inside these config fields. I.e. Blobstore essentially takes ownership of BlobstoreOptions, because it needs to store manifold bucket name. Same for scuba_censored_table, filestore_params, bookmarks_cache_ttl etc. So unless I'm missing anything, we can either pass them as reference and then we'll have to copy them, or we can just pass a value from BlobrepoBuilder directly. Reviewed By: krallin Differential Revision: D20312567 fbshipit-source-id: 14634f5e14f103b110482557254f084da1c725e1
2020-03-09 22:03:07 +03:00
&config,
cmdlib::args::parse_mysql_options(&matches),
mononoke: don't require cachelib to talk to a remote DB Summary: Currently, we implicitly expect that caching is enabled if we're dealing with a remote repository, but that means cachelib must be enabled when running with a remote repository, and that is ... slow. This can be problematic in two cases: In tests. It makes MySQL tests unbearably slow, and a little more flaky because we end up using so much CPU. With this patch, MySQL tests remain slower than SQLite tests, but by a factor of < 2, which is a pretty substantial improvement. Running trivial administrative commands (e.g. a `mononoke_admin`), notably using a dev build (which right now unbearably slow). With this patch, such a trivial command is about 6x faster: ``` [torozco@devvm4998.lla1 ~/fbcode] time buck-out/gen/scm/mononoke/admin#binary/admin --repo-id 2102 --mononoke-config-path /home/torozco/local/.mononoke_exec/config/PROD --skip-caching bookmarks list --kind publishing Jun 21 08:57:36.658 INFO using repo "instagram-server" repoid RepositoryId(2102) master c96ac4654e4d2da45a9597af859adeac9dba3d7ca964cb42e5c96bc153f185e3 2c5713ad27262b91bf1dfaf21b3cf34fe3926c8d real 0m5.299s user 0m5.097s sys 0m0.699s [torozco@devvm4998.lla1 ~/fbcode] time buck-out/gen/scm/mononoke/admin#binary/admin --repo-id 2102 --mononoke-config-path /home/torozco/local/.mononoke_exec/config/PROD bookmarks list --kind publishing I0621 08:57:59.299988 1181997 CacheAllocator-inl.h:3123] Started worker 'PoolRebalancer' Jun 21 08:57:59.328 INFO using repo "instagram-server" repoid RepositoryId(2102) master c96ac4654e4d2da45a9597af859adeac9dba3d7ca964cb42e5c96bc153f185e3 2c5713ad27262b91bf1dfaf21b3cf34fe3926c8d real 0m28.620s user 0m27.680s sys 0m2.466s ``` This is also nice because it means the MySQL tests won't talk to Memcache anymore. --- Note: in this refactor, I made `Caching` an enum so it can't accidentally be swapped with some other boolean. --- Finally, it also uses up quite a bit less RAM (we no longer need 2GB of RAM to output one line of bookmarks — although we're still using quite a bit!): ``` [torozco@devvm4998.lla1 ~/fbcode] env time buck-out/gen/scm/mononoke/admin#binary/admin --skip-caching --repo-id 2102 --mononoke-config-path /home/torozco/local/.mononoke_exec/config/PROD bookmarks list --kind publishing Jun 21 09:18:36.074 INFO using repo "instagram-server" repoid RepositoryId(2102) master abdd2f78dafeaa8d4b96897955a63844b31324f9d89176b3a62088d0e2ae2b22 1702392d14bf7a332bf081518cb1ea3c83a13c39 5.08user 0.68system 0:05.28elapsed 109%CPU (0avgtext+0avgdata 728024maxresident)k 6776inputs+0outputs (8major+115477minor)pagefaults 0swaps [torozco@devvm4998.lla1 ~/fbcode] env time buck-out/gen/scm/mononoke/admin#binary/admin --repo-id 2102 --mononoke-config-path /home/torozco/local/.mononoke_exec/config/PROD bookmarks list --kind publishing I0621 09:19:01.385933 1244489 CacheAllocator-inl.h:3123] Started worker 'PoolRebalancer' Jun 21 09:19:01.412 INFO using repo "instagram-server" repoid RepositoryId(2102) master abdd2f78dafeaa8d4b96897955a63844b31324f9d89176b3a62088d0e2ae2b22 1702392d14bf7a332bf081518cb1ea3c83a13c39 26.96user 2.27system 0:27.93elapsed 104%CPU (0avgtext+0avgdata 2317716maxresident)k 11416inputs+5384outputs (17major+605118minor)pagefaults 0swaps ``` Reviewed By: farnz Differential Revision: D15941644 fbshipit-source-id: 0df4a74ccd0220a786ebf0e883e1a9b8aab0a647
2019-06-24 16:03:31 +03:00
caching,
common_config.scuba_censored_table,
readonly_storage,
cmdlib::args::parse_blobstore_options(&matches),
&logger,
mononoke: introduce BlobrepoBuilder Summary: The goal of the whole stack is quite simple (add reponame field to BlobRepo), but this stack also tries to make it easier to initialize BlobRepo. To do that BlobrepoBuilder was added. It now accepts RepoConfig instead of 6 different fields from RepoConfig - that makes it easier to pass a field from config into BlobRepo. It also allows to customize BlobRepo. Currently it's used just to add redaction override, but later we can extend it for other use cases as well, with the hope that we'll be able to remove a bunch of repo-creation functions from cmdlib. Because of BlobrepoBuilder we no longer need open_blobrepo function. Later we might consider removing open_blobrepo_given_datasources as well. Note that this diff *adds* a few new clones. I don't consider it being a big problem, though I'm curious to hear your thoughts folks. Note that another option for the implementation would be to take a reference to objects instead of taking them by value. I briefly looked into how they used, and lot of them are passed to the objects that actually take ownership of what's inside these config fields. I.e. Blobstore essentially takes ownership of BlobstoreOptions, because it needs to store manifold bucket name. Same for scuba_censored_table, filestore_params, bookmarks_cache_ttl etc. So unless I'm missing anything, we can either pass them as reference and then we'll have to copy them, or we can just pass a value from BlobrepoBuilder directly. Reviewed By: krallin Differential Revision: D20312567 fbshipit-source-id: 14634f5e14f103b110482557254f084da1c725e1
2020-03-09 22:03:07 +03:00
);
let blobrepo = builder.build().await?;
let (exclusions, inclusions) = future::try_join(
get_changesets(matches, "exclude", "exclude_file", &ctx, &blobrepo),
get_changesets(matches, "changeset", "changeset_file", &ctx, &blobrepo),
)
.await?;
let tail = &Tailer::new(
ctx.clone(),
blobrepo.clone(),
config.clone(),
bookmark,
concurrency,
exclusions,
&disabled_hooks,
)
.await?;
let mut stream = if inclusions.is_empty() {
tail.run_with_limit(limit).boxed()
} else {
tail.run_changesets(inclusions).boxed()
};
let mut summary = HookExecutionSummary::default();
info!(logger, "==== Hooks results ====");
while let Some(instance) = stream.next().await {
let instance = instance?;
if let Some(ref mut stats_file) = stats_file {
let line = format!(
"{},{},{},{},{}\n",
instance.cs_id,
instance.file_count,
instance.outcomes.len(),
instance.stats.completion_time.as_micros_unchecked(),
instance.stats.poll_time.as_micros_unchecked(),
);
stats_file.write_all(line.as_ref()).await?;
}
summary.add_instance(&instance, &logger);
}
info!(logger, "==== Hooks stats ====");
info!(
logger,
"Completion time: {}us",
summary.completion_time.as_micros_unchecked()
);
info!(
logger,
"Poll time: {}us",
summary.poll_time.as_micros_unchecked()
);
info!(logger, "Changesets accepted: {}", summary.accepted);
info!(logger, "Changesets rejected: {}", summary.rejected);
if summary.rejected > 0 {
return Err(format_err!("Hook rejections: {}", summary.rejected));
}
Ok(())
}
#[derive(Default)]
struct HookExecutionSummary {
accepted: u64,
rejected: u64,
completion_time: Duration,
poll_time: Duration,
}
impl HookExecutionSummary {
pub fn add_instance(&mut self, instance: &HookExecutionInstance, logger: &Logger) {
let mut is_rejected = false;
for outcome in instance.outcomes.iter() {
if outcome.is_rejection() {
is_rejected = true;
info!(logger, "{}", outcome);
} else {
debug!(logger, "{}", outcome);
}
}
if is_rejected {
self.rejected += 1;
} else {
self.accepted += 1;
}
self.completion_time += instance.stats.completion_time;
self.poll_time += instance.stats.poll_time;
}
}
fn setup_app<'a, 'b>() -> App<'a, 'b> {
let app = cmdlib::args::MononokeApp::new("run hooks against repo")
.with_advanced_args_hidden()
.build()
.version("0.0.0")
.arg(
Arg::with_name("bookmark")
.long("bookmark")
.short("B")
.help("bookmark to tail")
.takes_value(true)
.required(true),
)
.arg(
Arg::with_name("concurrency")
.long("concurrency")
.help("the number of changesets to run hooks for in parallel")
.takes_value(true),
)
.arg(
Arg::with_name("changeset")
.long("changeset")
.short("c")
.multiple(true)
.help("the changeset to run hooks for")
.takes_value(true),
)
.arg(
Arg::with_name("changeset_file")
.long("changeset_file")
.help("a file containing chnagesets to explicitly run hooks for")
.takes_value(true),
)
.arg(
Arg::with_name("exclude")
.long("exclude")
.short("e")
.multiple(true)
.help("the changesets to exclude")
.takes_value(true),
)
.arg(
Arg::with_name("exclude_file")
.long("exclude_file")
.short("f")
.help("a file containing changesets to exclude that is separated by new lines")
.takes_value(true),
)
.arg(
Arg::with_name("limit")
.long("limit")
.takes_value(true)
.help("limit number of commits to process (non-continuous only). Default: 1000"),
)
.arg(
Arg::with_name("stats-file")
.long("stats-file")
.takes_value(true)
.help("Log hook execution statistics to a file (CSV format)"),
);
cmdlib::args::add_disabled_hooks_args(app)
}