mononoke: option for how far back to peek at blobstore queue when scrubbing

Summary:
When scrubbing to a new store in a multiplex that has a recently populated store,  the ctime for blobs from the recently populated store will often be inside the 7 day window for lookback of the healer queue which results in high query load on the queue.

This diff adds a command line option to override the period of lookback to the queue so that it can be made shorter and reduce or eliminate the queue queries.

Reviewed By: farnz

Differential Revision: D28533393

fbshipit-source-id: fd322364e0f595065fdc7900e86bc7c4e1cfec8a
This commit is contained in:
Alex Hornby 2021-05-21 05:19:48 -07:00 committed by Facebook GitHub Bot
parent fde2eee102
commit fdee7b86e9
5 changed files with 117 additions and 20 deletions

View File

@ -124,6 +124,18 @@ impl BlobstoreOptions {
self
}
}
pub fn with_scrub_queue_peek_bound(self, queue_peek_bound_secs: u64) -> Self {
if let Some(mut scrub_options) = self.scrub_options {
scrub_options.queue_peek_bound = Some(Duration::from_secs(queue_peek_bound_secs));
Self {
scrub_options: Some(scrub_options),
..self
}
} else {
self
}
}
}
/// Construct a blobstore according to the specification. The multiplexed blobstore

View File

@ -81,6 +81,7 @@ pub struct ScrubOptions {
pub scrub_action: ScrubAction,
pub scrub_grace: Option<Duration>,
pub scrub_action_on_missing_write_mostly: ScrubWriteMostly,
pub queue_peek_bound: Option<Duration>,
}
impl Default for ScrubOptions {
@ -89,6 +90,7 @@ impl Default for ScrubOptions {
scrub_action: ScrubAction::ReportOnly,
scrub_grace: None,
scrub_action_on_missing_write_mostly: ScrubWriteMostly::Scrub,
queue_peek_bound: Some(*HEAL_MAX_BACKLOG),
}
}
}
@ -291,13 +293,14 @@ async fn blobstore_get(
|| !missing_main.is_empty()
{
// Only peek the queue if needed
let entries = match ctime_age.as_ref() {
// Avoid false alarms for recently written items still on the healer queue
Some(ctime_age) if ctime_age < &*HEAL_MAX_BACKLOG => {
queue.get(ctx, key).await?
}
_ => vec![],
};
let entries =
match (ctime_age.as_ref(), scrub_options.queue_peek_bound.as_ref()) {
// Avoid false alarms for recently written items still on the healer queue
(Some(ctime_age), Some(bound)) if ctime_age < bound => {
queue.get(ctx, key).await?
}
_ => vec![],
};
// Only attempt the action if we don't know of pending writes from the queue
if entries.is_empty() {

View File

@ -11,7 +11,7 @@ use std::{
future::Future,
pin::Pin,
sync::{Arc, Mutex},
time::SystemTime,
time::{Duration, SystemTime},
};
use crate::base::{MultiplexedBlobstoreBase, MultiplexedBlobstorePutHandler};
@ -280,6 +280,7 @@ async fn scrub_none(
scrub_action: ScrubAction::ReportOnly,
scrub_grace: None,
scrub_action_on_missing_write_mostly,
queue_peek_bound: None,
},
Arc::new(LoggingScrubHandler::new(false)) as Arc<dyn ScrubHandler>,
);
@ -672,6 +673,7 @@ async fn scrub_scenarios(fb: FacebookInit, scrub_action_on_missing_write_mostly:
scrub_action: ScrubAction::ReportOnly,
scrub_grace: None,
scrub_action_on_missing_write_mostly,
queue_peek_bound: None,
},
scrub_handler.clone(),
);
@ -772,8 +774,9 @@ async fn scrub_scenarios(fb: FacebookInit, scrub_action_on_missing_write_mostly:
scrub_action: ScrubAction::Repair,
scrub_grace: None,
scrub_action_on_missing_write_mostly,
queue_peek_bound: None,
},
scrub_handler,
scrub_handler.clone(),
);
// Non-existing key in all blobstores, new blobstore failing
@ -804,6 +807,59 @@ async fn scrub_scenarios(fb: FacebookInit, scrub_action_on_missing_write_mostly:
assert!(get_fut.await.is_err(), "Empty replacement against error");
}
// One working replica after failure, queue lookback means scrub action not performed
{
// Create with different queue_peek_bound
let bs = ScrubBlobstore::new(
MultiplexId::new(1),
vec![(bid0, bs0.clone()), (bid1, bs1.clone())],
vec![(bid2, bs2.clone())],
nonzero!(1usize),
queue.clone(),
MononokeScubaSampleBuilder::with_discard(),
nonzero!(1u64),
ScrubOptions {
scrub_action: ScrubAction::Repair,
scrub_grace: None,
scrub_action_on_missing_write_mostly,
queue_peek_bound: Some(Duration::from_secs(7200)),
},
scrub_handler,
);
let v1 = make_value("v1");
let k1 = "k1";
// Check there is an entry on the queue
match queue.get(ctx, k1).await.unwrap().as_slice() {
[entry] => {
assert_eq!(entry.blobstore_id, bid0, "Queue bad");
}
_ => panic!("only one entry expected"),
}
// bs1 and bs2 empty at this point
assert_eq!(bs0.get_bytes(k1), Some(v1.clone()));
assert!(bs1.storage.with(|s| s.is_empty()));
assert!(bs2.storage.with(|s| s.is_empty()));
let mut get_fut = bs.get(ctx, k1).map_err(|_| ()).boxed();
assert_eq!(PollOnce::new(Pin::new(&mut get_fut)).await, Poll::Pending);
// tick the gets
bs0.tick(None);
assert_eq!(PollOnce::new(Pin::new(&mut get_fut)).await, Poll::Pending);
bs1.tick(None);
if scrub_action_on_missing_write_mostly != ScrubWriteMostly::PopulateIfAbsent {
// this read doesn't happen in this mode
bs2.tick(None);
}
// No repairs to tick, as its on queue within the peek lookback
// Succeeds
assert_eq!(get_fut.await.unwrap().map(|v| v.into()), Some(v1.clone()));
// bs1 and bs2 still empty at this point. assumption is item on queue will be healed later.
assert_eq!(bs0.get_bytes(k1), Some(v1.clone()));
assert!(bs1.storage.with(|s| s.is_empty()));
assert!(bs2.storage.with(|s| s.is_empty()));
}
// One working replica after failure.
{
let v1 = make_value("v1");

View File

@ -73,6 +73,7 @@ pub const BLOBSTORE_PUT_BEHAVIOUR_ARG: &str = "blobstore-put-behaviour";
pub const BLOBSTORE_SCRUB_ACTION_ARG: &str = "blobstore-scrub-action";
pub const BLOBSTORE_SCRUB_GRACE_ARG: &str = "blobstore-scrub-grace";
pub const BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG: &str = "blobstore-scrub-write-mostly-missing";
pub const BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG: &str = "blobstore-scrub-queue-peek";
pub const WITH_READONLY_STORAGE_ARG: &str = "with-readonly-storage";
@ -189,6 +190,9 @@ pub struct MononokeAppBuilder {
// Whether to report missing keys in write mostly blobstores as a scrub action when scrubbing
scrub_action_on_missing_write_mostly_default: Option<ScrubWriteMostly>,
// Whether to set a default for how long to peek back at the multiplex queue when scrubbing
scrub_queue_peek_bound_secs_default: Option<u64>,
}
/// Things we want to live for the lifetime of the mononoke binary
@ -282,6 +286,7 @@ impl MononokeAppBuilder {
scrub_action_default: None,
scrub_grace_secs_default: None,
scrub_action_on_missing_write_mostly_default: None,
scrub_queue_peek_bound_secs_default: None,
}
}
@ -766,6 +771,19 @@ impl MononokeAppBuilder {
scrub_grace_arg = scrub_grace_arg
.default_value(&FORMATTED.get_or_init(|| format!("{}", default)));
}
let mut scrub_queue_peek_bound_arg = Arg::with_name(
BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG,
)
.long(BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG)
.takes_value(true)
.required(false)
.requires(BLOBSTORE_SCRUB_ACTION_ARG)
.help("Number of seconds within which we consider it worth checking the healer queue.");
if let Some(default) = self.scrub_queue_peek_bound_secs_default {
static FORMATTED: OnceCell<String> = OnceCell::new(); // Lazy static is nicer to LeakSanitizer than Box::leak
scrub_queue_peek_bound_arg = scrub_queue_peek_bound_arg
.default_value(&FORMATTED.get_or_init(|| format!("{}", default)));
};
let mut scrub_action_on_missing_write_mostly_arg =
Arg::with_name(BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG)
.long(BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG)
@ -782,6 +800,7 @@ impl MononokeAppBuilder {
app.arg(scrub_action_arg)
.arg(scrub_grace_arg)
.arg(scrub_action_on_missing_write_mostly_arg)
.arg(scrub_queue_peek_bound_arg)
} else {
app
}

View File

@ -50,13 +50,13 @@ use super::{
app::{
ArgType, MononokeAppData, BLOBSTORE_BYTES_MIN_THROTTLE_ARG, BLOBSTORE_PUT_BEHAVIOUR_ARG,
BLOBSTORE_SCRUB_ACTION_ARG, BLOBSTORE_SCRUB_GRACE_ARG,
BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG, CACHELIB_ATTEMPT_ZSTD_ARG, CRYPTO_PATH_REGEX_ARG,
DISABLE_TUNABLES, ENABLE_MCROUTER, LOCAL_CONFIGERATOR_PATH_ARG, LOG_EXCLUDE_TAG,
LOG_INCLUDE_TAG, MANIFOLD_API_KEY_ARG, MANIFOLD_THRIFT_OPS_ARG,
MANIFOLD_WEAK_CONSISTENCY_MS_ARG, MYSQL_CONN_OPEN_TIMEOUT, MYSQL_MASTER_ONLY,
MYSQL_MAX_QUERY_TIME, MYSQL_POOL_AGE_TIMEOUT, MYSQL_POOL_IDLE_TIMEOUT, MYSQL_POOL_LIMIT,
MYSQL_POOL_PER_KEY_LIMIT, MYSQL_POOL_THREADS_NUM, MYSQL_SQLBLOB_POOL_AGE_TIMEOUT,
MYSQL_SQLBLOB_POOL_IDLE_TIMEOUT, MYSQL_SQLBLOB_POOL_LIMIT,
BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG, BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG,
CACHELIB_ATTEMPT_ZSTD_ARG, CRYPTO_PATH_REGEX_ARG, DISABLE_TUNABLES, ENABLE_MCROUTER,
LOCAL_CONFIGERATOR_PATH_ARG, LOG_EXCLUDE_TAG, LOG_INCLUDE_TAG, MANIFOLD_API_KEY_ARG,
MANIFOLD_THRIFT_OPS_ARG, MANIFOLD_WEAK_CONSISTENCY_MS_ARG, MYSQL_CONN_OPEN_TIMEOUT,
MYSQL_MASTER_ONLY, MYSQL_MAX_QUERY_TIME, MYSQL_POOL_AGE_TIMEOUT, MYSQL_POOL_IDLE_TIMEOUT,
MYSQL_POOL_LIMIT, MYSQL_POOL_PER_KEY_LIMIT, MYSQL_POOL_THREADS_NUM,
MYSQL_SQLBLOB_POOL_AGE_TIMEOUT, MYSQL_SQLBLOB_POOL_IDLE_TIMEOUT, MYSQL_SQLBLOB_POOL_LIMIT,
MYSQL_SQLBLOB_POOL_PER_KEY_LIMIT, MYSQL_SQLBLOB_POOL_THREADS_NUM, READ_BURST_BYTES_ARG,
READ_BYTES_ARG, READ_CHAOS_ARG, READ_QPS_ARG, RENDEZVOUS_FREE_CONNECTIONS, RUNTIME_THREADS,
TUNABLES_CONFIG, WITH_DYNAMIC_OBSERVABILITY, WITH_READONLY_STORAGE_ARG,
@ -670,18 +670,25 @@ fn parse_blobstore_options(
.value_of(BLOBSTORE_SCRUB_GRACE_ARG)
.map(u64::from_str)
.transpose()?;
let scrub_action_on_missing_write_mostly = matches
.value_of(BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG)
.map(ScrubWriteMostly::from_str)
.transpose()?;
let blobstore_options = blobstore_options
let mut blobstore_options = blobstore_options
.with_scrub_action(scrub_action)
.with_scrub_grace(scrub_grace);
if let Some(v) = scrub_action_on_missing_write_mostly {
blobstore_options.with_scrub_action_on_missing_write_mostly(v)
} else {
blobstore_options
blobstore_options = blobstore_options.with_scrub_action_on_missing_write_mostly(v)
}
let scrub_queue_peek_bound = matches
.value_of(BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG)
.map(u64::from_str)
.transpose()?;
if let Some(v) = scrub_queue_peek_bound {
blobstore_options = blobstore_options.with_scrub_queue_peek_bound(v)
}
blobstore_options
} else {
blobstore_options
};