sapling/eden/mononoke/hook_tailer/main.rs

344 lines
9.7 KiB
Rust
Raw Normal View History

/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#![deny(warnings)]
#![feature(never_type)]
pub mod tailer;
use anyhow::{format_err, Error, Result};
mononoke: introduce BlobrepoBuilder Summary: The goal of the whole stack is quite simple (add reponame field to BlobRepo), but this stack also tries to make it easier to initialize BlobRepo. To do that BlobrepoBuilder was added. It now accepts RepoConfig instead of 6 different fields from RepoConfig - that makes it easier to pass a field from config into BlobRepo. It also allows to customize BlobRepo. Currently it's used just to add redaction override, but later we can extend it for other use cases as well, with the hope that we'll be able to remove a bunch of repo-creation functions from cmdlib. Because of BlobrepoBuilder we no longer need open_blobrepo function. Later we might consider removing open_blobrepo_given_datasources as well. Note that this diff *adds* a few new clones. I don't consider it being a big problem, though I'm curious to hear your thoughts folks. Note that another option for the implementation would be to take a reference to objects instead of taking them by value. I briefly looked into how they used, and lot of them are passed to the objects that actually take ownership of what's inside these config fields. I.e. Blobstore essentially takes ownership of BlobstoreOptions, because it needs to store manifold bucket name. Same for scuba_censored_table, filestore_params, bookmarks_cache_ttl etc. So unless I'm missing anything, we can either pass them as reference and then we'll have to copy them, or we can just pass a value from BlobrepoBuilder directly. Reviewed By: krallin Differential Revision: D20312567 fbshipit-source-id: 14634f5e14f103b110482557254f084da1c725e1
2020-03-09 22:03:07 +03:00
use blobrepo_factory::BlobrepoBuilder;
use bookmarks::BookmarkName;
use clap::{App, Arg, ArgMatches};
use cmdlib::helpers::block_execute;
use context::CoreContext;
use fbinit::FacebookInit;
use futures::{
compat::Future01CompatExt,
future::FutureExt,
stream::{self, StreamExt, TryStreamExt},
};
use futures_ext::BoxFuture;
use futures_old::Future as OldFuture;
use hooks::HookOutcome;
use manifold::{ManifoldHttpClient, RequestContext};
use mercurial_types::{HgChangesetId, HgNodeHash};
use slog::{debug, info, o, Drain, Level, Logger};
use slog_glog_fmt::{kv_categorizer, kv_defaults, GlogFormat};
use std::fmt;
use std::fs::File;
use std::io;
use std::io::{BufRead, BufReader};
use std::str::FromStr;
use std::time::Duration;
use tailer::Tailer;
use thiserror::Error;
use tokio_timer::sleep;
#[fbinit::main]
fn main(fb: FacebookInit) -> Result<()> {
panichandler::set_panichandler(panichandler::Fate::Abort);
let matches = setup_app().get_matches();
let (repo_name, config) = cmdlib::args::get_config(fb, &matches)?;
let logger = setup_logger(&matches, repo_name.to_string());
info!(logger, "Hook tailer is starting");
let ctx = CoreContext::new_with_logger(fb, logger.clone());
block_execute(
run_hook_tailer(fb, &ctx, &config, &repo_name, &matches, &logger),
fb,
"hook_tailer",
&logger,
&matches,
cmdlib::monitoring::AliveService,
)
}
async fn run_hook_tailer<'a>(
fb: FacebookInit,
ctx: &CoreContext,
config: &metaconfig_types::RepoConfig,
repo_name: &str,
matches: &'a ArgMatches<'a>,
logger: &Logger,
) -> Result<(), Error> {
let bookmark_name = matches.value_of("bookmark").unwrap();
let bookmark = BookmarkName::new(bookmark_name).unwrap();
let common_config = cmdlib::args::read_common_config(fb, &matches)?;
let init_revision = matches.value_of("init_revision").map(String::from);
let continuous = matches.is_present("continuous");
let limit = cmdlib::args::get_u64(&matches, "limit", 1000);
let changeset = matches.value_of("changeset").map_or(None, |cs| {
Some(HgChangesetId::from_str(cs).expect("Invalid changesetid"))
});
let mut excludes = matches
.values_of("exclude")
.map(|matches| {
matches
.map(|cs| HgChangesetId::from_str(cs).expect("Invalid changeset"))
.collect()
})
.unwrap_or(vec![]);
if let Some(path) = matches.value_of("exclude_file") {
let changesets = BufReader::new(File::open(path)?)
.lines()
.filter_map(|cs_str| {
cs_str
.map_err(Error::from)
.and_then(|cs_str| HgChangesetId::from_str(&cs_str))
.ok()
});
excludes.extend(changesets);
}
let disabled_hooks = cmdlib::args::parse_disabled_hooks_no_repo_prefix(&matches, &logger);
let caching = cmdlib::args::init_cachelib(fb, &matches, None);
let readonly_storage = cmdlib::args::parse_readonly_storage(&matches);
mononoke: introduce BlobrepoBuilder Summary: The goal of the whole stack is quite simple (add reponame field to BlobRepo), but this stack also tries to make it easier to initialize BlobRepo. To do that BlobrepoBuilder was added. It now accepts RepoConfig instead of 6 different fields from RepoConfig - that makes it easier to pass a field from config into BlobRepo. It also allows to customize BlobRepo. Currently it's used just to add redaction override, but later we can extend it for other use cases as well, with the hope that we'll be able to remove a bunch of repo-creation functions from cmdlib. Because of BlobrepoBuilder we no longer need open_blobrepo function. Later we might consider removing open_blobrepo_given_datasources as well. Note that this diff *adds* a few new clones. I don't consider it being a big problem, though I'm curious to hear your thoughts folks. Note that another option for the implementation would be to take a reference to objects instead of taking them by value. I briefly looked into how they used, and lot of them are passed to the objects that actually take ownership of what's inside these config fields. I.e. Blobstore essentially takes ownership of BlobstoreOptions, because it needs to store manifold bucket name. Same for scuba_censored_table, filestore_params, bookmarks_cache_ttl etc. So unless I'm missing anything, we can either pass them as reference and then we'll have to copy them, or we can just pass a value from BlobrepoBuilder directly. Reviewed By: krallin Differential Revision: D20312567 fbshipit-source-id: 14634f5e14f103b110482557254f084da1c725e1
2020-03-09 22:03:07 +03:00
let builder = BlobrepoBuilder::new(
fb,
repo_name.into(),
mononoke: introduce BlobrepoBuilder Summary: The goal of the whole stack is quite simple (add reponame field to BlobRepo), but this stack also tries to make it easier to initialize BlobRepo. To do that BlobrepoBuilder was added. It now accepts RepoConfig instead of 6 different fields from RepoConfig - that makes it easier to pass a field from config into BlobRepo. It also allows to customize BlobRepo. Currently it's used just to add redaction override, but later we can extend it for other use cases as well, with the hope that we'll be able to remove a bunch of repo-creation functions from cmdlib. Because of BlobrepoBuilder we no longer need open_blobrepo function. Later we might consider removing open_blobrepo_given_datasources as well. Note that this diff *adds* a few new clones. I don't consider it being a big problem, though I'm curious to hear your thoughts folks. Note that another option for the implementation would be to take a reference to objects instead of taking them by value. I briefly looked into how they used, and lot of them are passed to the objects that actually take ownership of what's inside these config fields. I.e. Blobstore essentially takes ownership of BlobstoreOptions, because it needs to store manifold bucket name. Same for scuba_censored_table, filestore_params, bookmarks_cache_ttl etc. So unless I'm missing anything, we can either pass them as reference and then we'll have to copy them, or we can just pass a value from BlobrepoBuilder directly. Reviewed By: krallin Differential Revision: D20312567 fbshipit-source-id: 14634f5e14f103b110482557254f084da1c725e1
2020-03-09 22:03:07 +03:00
&config,
cmdlib::args::parse_mysql_options(&matches),
mononoke: don't require cachelib to talk to a remote DB Summary: Currently, we implicitly expect that caching is enabled if we're dealing with a remote repository, but that means cachelib must be enabled when running with a remote repository, and that is ... slow. This can be problematic in two cases: In tests. It makes MySQL tests unbearably slow, and a little more flaky because we end up using so much CPU. With this patch, MySQL tests remain slower than SQLite tests, but by a factor of < 2, which is a pretty substantial improvement. Running trivial administrative commands (e.g. a `mononoke_admin`), notably using a dev build (which right now unbearably slow). With this patch, such a trivial command is about 6x faster: ``` [torozco@devvm4998.lla1 ~/fbcode] time buck-out/gen/scm/mononoke/admin#binary/admin --repo-id 2102 --mononoke-config-path /home/torozco/local/.mononoke_exec/config/PROD --skip-caching bookmarks list --kind publishing Jun 21 08:57:36.658 INFO using repo "instagram-server" repoid RepositoryId(2102) master c96ac4654e4d2da45a9597af859adeac9dba3d7ca964cb42e5c96bc153f185e3 2c5713ad27262b91bf1dfaf21b3cf34fe3926c8d real 0m5.299s user 0m5.097s sys 0m0.699s [torozco@devvm4998.lla1 ~/fbcode] time buck-out/gen/scm/mononoke/admin#binary/admin --repo-id 2102 --mononoke-config-path /home/torozco/local/.mononoke_exec/config/PROD bookmarks list --kind publishing I0621 08:57:59.299988 1181997 CacheAllocator-inl.h:3123] Started worker 'PoolRebalancer' Jun 21 08:57:59.328 INFO using repo "instagram-server" repoid RepositoryId(2102) master c96ac4654e4d2da45a9597af859adeac9dba3d7ca964cb42e5c96bc153f185e3 2c5713ad27262b91bf1dfaf21b3cf34fe3926c8d real 0m28.620s user 0m27.680s sys 0m2.466s ``` This is also nice because it means the MySQL tests won't talk to Memcache anymore. --- Note: in this refactor, I made `Caching` an enum so it can't accidentally be swapped with some other boolean. --- Finally, it also uses up quite a bit less RAM (we no longer need 2GB of RAM to output one line of bookmarks — although we're still using quite a bit!): ``` [torozco@devvm4998.lla1 ~/fbcode] env time buck-out/gen/scm/mononoke/admin#binary/admin --skip-caching --repo-id 2102 --mononoke-config-path /home/torozco/local/.mononoke_exec/config/PROD bookmarks list --kind publishing Jun 21 09:18:36.074 INFO using repo "instagram-server" repoid RepositoryId(2102) master abdd2f78dafeaa8d4b96897955a63844b31324f9d89176b3a62088d0e2ae2b22 1702392d14bf7a332bf081518cb1ea3c83a13c39 5.08user 0.68system 0:05.28elapsed 109%CPU (0avgtext+0avgdata 728024maxresident)k 6776inputs+0outputs (8major+115477minor)pagefaults 0swaps [torozco@devvm4998.lla1 ~/fbcode] env time buck-out/gen/scm/mononoke/admin#binary/admin --repo-id 2102 --mononoke-config-path /home/torozco/local/.mononoke_exec/config/PROD bookmarks list --kind publishing I0621 09:19:01.385933 1244489 CacheAllocator-inl.h:3123] Started worker 'PoolRebalancer' Jun 21 09:19:01.412 INFO using repo "instagram-server" repoid RepositoryId(2102) master abdd2f78dafeaa8d4b96897955a63844b31324f9d89176b3a62088d0e2ae2b22 1702392d14bf7a332bf081518cb1ea3c83a13c39 26.96user 2.27system 0:27.93elapsed 104%CPU (0avgtext+0avgdata 2317716maxresident)k 11416inputs+5384outputs (17major+605118minor)pagefaults 0swaps ``` Reviewed By: farnz Differential Revision: D15941644 fbshipit-source-id: 0df4a74ccd0220a786ebf0e883e1a9b8aab0a647
2019-06-24 16:03:31 +03:00
caching,
common_config.scuba_censored_table,
readonly_storage,
cmdlib::args::parse_blobstore_options(&matches),
&logger,
mononoke: introduce BlobrepoBuilder Summary: The goal of the whole stack is quite simple (add reponame field to BlobRepo), but this stack also tries to make it easier to initialize BlobRepo. To do that BlobrepoBuilder was added. It now accepts RepoConfig instead of 6 different fields from RepoConfig - that makes it easier to pass a field from config into BlobRepo. It also allows to customize BlobRepo. Currently it's used just to add redaction override, but later we can extend it for other use cases as well, with the hope that we'll be able to remove a bunch of repo-creation functions from cmdlib. Because of BlobrepoBuilder we no longer need open_blobrepo function. Later we might consider removing open_blobrepo_given_datasources as well. Note that this diff *adds* a few new clones. I don't consider it being a big problem, though I'm curious to hear your thoughts folks. Note that another option for the implementation would be to take a reference to objects instead of taking them by value. I briefly looked into how they used, and lot of them are passed to the objects that actually take ownership of what's inside these config fields. I.e. Blobstore essentially takes ownership of BlobstoreOptions, because it needs to store manifold bucket name. Same for scuba_censored_table, filestore_params, bookmarks_cache_ttl etc. So unless I'm missing anything, we can either pass them as reference and then we'll have to copy them, or we can just pass a value from BlobrepoBuilder directly. Reviewed By: krallin Differential Revision: D20312567 fbshipit-source-id: 14634f5e14f103b110482557254f084da1c725e1
2020-03-09 22:03:07 +03:00
);
let rc = RequestContext {
bucket_name: "mononoke_prod".into(),
api_key: "mononoke_prod-key".into(),
timeout_msec: 10000,
};
let id = "ManifoldBlob";
let manifold_client = ManifoldHttpClient::new(fb, id, rc)?;
let blobrepo = builder.build().await?;
let excl = blobrepo
.get_hg_bonsai_mapping(ctx.clone(), excludes)
.compat()
.await?;
let tail = &Tailer::new(
ctx.clone(),
blobrepo.clone(),
config.clone(),
bookmark,
manifold_client.clone(),
excl.into_iter().map(|(_, cs)| cs).collect(),
&disabled_hooks,
)?;
let f = match init_revision {
Some(init_rev) => {
info!(
*logger,
"Initial revision specified as argument {}", init_rev
);
let hash = HgNodeHash::from_str(&init_rev)?;
let bytes = hash.as_bytes().into();
manifold_client
.write(tail.get_last_rev_key(), bytes)
.map(|_| ())
.compat()
.boxed()
}
None => async { Ok(()) }.boxed(),
};
match (continuous, changeset) {
(true, _) => {
// Tail new commits and run hooks on them
async move {
f.await?;
stream::repeat(())
.map(Ok)
.try_for_each({
move |()| async move {
process_hook_results(tail.run(), logger).await?;
sleep(Duration::new(10, 0))
.map_err(|err| format_err!("Tokio timer error {:?}", err))
.compat()
.await
}
})
.await
}
.boxed()
}
(_, Some(changeset)) => {
process_hook_results(tail.run_single_changeset(changeset), logger).boxed()
}
_ => {
f.await?;
process_hook_results(tail.run_with_limit(limit), logger).boxed()
}
}
.await
}
async fn process_hook_results(
fut: BoxFuture<Vec<HookOutcome>, Error>,
logger: &Logger,
) -> Result<(), Error> {
let res = fut.compat().await?;
let mut hooks_stat = HookExecutionStat::new();
debug!(logger, "==== Hooks results ====");
res.into_iter().for_each(|outcome| {
hooks_stat.record_hook_execution(&outcome);
if outcome.is_rejection() {
info!(logger, "{}", outcome);
} else {
debug!(logger, "{}", outcome);
}
});
info!(logger, "==== Hooks stat: {} ====", hooks_stat);
if hooks_stat.rejected > 0 {
Err(format_err!("Hook rejections: {}", hooks_stat.rejected,))
} else {
Ok(())
}
}
struct HookExecutionStat {
accepted: usize,
rejected: usize,
}
impl HookExecutionStat {
pub fn new() -> Self {
Self {
accepted: 0,
rejected: 0,
}
}
pub fn record_hook_execution(&mut self, outcome: &hooks::HookOutcome) {
if outcome.is_rejection() {
self.rejected += 1;
} else {
self.accepted += 1;
}
}
}
impl fmt::Display for HookExecutionStat {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"accepted: {}, rejected: {}",
self.accepted, self.rejected
)
}
}
fn setup_app<'a, 'b>() -> App<'a, 'b> {
let app = cmdlib::args::MononokeApp::new("run hooks against repo")
.with_advanced_args_hidden()
.build()
.version("0.0.0")
.arg(
Arg::with_name("bookmark")
.long("bookmark")
.short("B")
.help("bookmark to tail")
.takes_value(true)
.required(true),
)
.arg(
Arg::with_name("changeset")
.long("changeset")
.short("c")
.help("the changeset to run hooks for")
.takes_value(true),
)
.arg(
Arg::with_name("exclude")
.long("exclude")
.short("e")
.multiple(true)
.help("the changesets to exclude")
.takes_value(true),
)
.arg(
Arg::with_name("exclude_file")
.long("exclude_file")
.short("f")
.help("a file containing changesets to exclude that is separated by new lines")
.takes_value(true),
)
.arg(
Arg::with_name("limit")
.long("limit")
.takes_value(true)
.help("limit number of commits to process (non-continuous only). Default: 1000"),
)
.arg(
Arg::with_name("continuous")
.long("continuous")
.help("continuously run hooks on new commits"),
)
.arg(
Arg::with_name("init_revision")
.long("init_revision")
.takes_value(true)
.help("the initial revision to start at"),
)
.arg(
Arg::with_name("debug")
.long("debug")
.short("d")
.help("print debug level output"),
);
cmdlib::args::add_disabled_hooks_args(app)
}
fn setup_logger<'a>(matches: &ArgMatches<'a>, repo_name: String) -> Logger {
let level = if matches.is_present("debug") {
Level::Debug
} else {
Level::Info
};
let drain = {
let drain = {
let decorator = slog_term::PlainSyncDecorator::new(io::stdout());
GlogFormat::new(decorator, kv_categorizer::FacebookCategorizer)
};
let drain = slog_stats::StatsDrain::new(drain);
drain.filter_level(level)
};
Logger::root(
drain.ignore_res(),
o!("repo" => repo_name,
kv_defaults::FacebookKV::new().expect("Failed to initialize logging")),
)
}
#[derive(Debug, Error)]
pub enum ErrorKind {
#[error("No such repo '{0}'")]
NoSuchRepo(String),
}