mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 08:47:12 +03:00
mononoke: option for how far back to peek at blobstore queue when scrubbing
Summary: When scrubbing to a new store in a multiplex that has a recently populated store, the ctime for blobs from the recently populated store will often be inside the 7 day window for lookback of the healer queue which results in high query load on the queue. This diff adds a command line option to override the period of lookback to the queue so that it can be made shorter and reduce or eliminate the queue queries. Reviewed By: farnz Differential Revision: D28533393 fbshipit-source-id: fd322364e0f595065fdc7900e86bc7c4e1cfec8a
This commit is contained in:
parent
fde2eee102
commit
fdee7b86e9
@ -124,6 +124,18 @@ impl BlobstoreOptions {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_scrub_queue_peek_bound(self, queue_peek_bound_secs: u64) -> Self {
|
||||
if let Some(mut scrub_options) = self.scrub_options {
|
||||
scrub_options.queue_peek_bound = Some(Duration::from_secs(queue_peek_bound_secs));
|
||||
Self {
|
||||
scrub_options: Some(scrub_options),
|
||||
..self
|
||||
}
|
||||
} else {
|
||||
self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a blobstore according to the specification. The multiplexed blobstore
|
||||
|
@ -81,6 +81,7 @@ pub struct ScrubOptions {
|
||||
pub scrub_action: ScrubAction,
|
||||
pub scrub_grace: Option<Duration>,
|
||||
pub scrub_action_on_missing_write_mostly: ScrubWriteMostly,
|
||||
pub queue_peek_bound: Option<Duration>,
|
||||
}
|
||||
|
||||
impl Default for ScrubOptions {
|
||||
@ -89,6 +90,7 @@ impl Default for ScrubOptions {
|
||||
scrub_action: ScrubAction::ReportOnly,
|
||||
scrub_grace: None,
|
||||
scrub_action_on_missing_write_mostly: ScrubWriteMostly::Scrub,
|
||||
queue_peek_bound: Some(*HEAL_MAX_BACKLOG),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -291,13 +293,14 @@ async fn blobstore_get(
|
||||
|| !missing_main.is_empty()
|
||||
{
|
||||
// Only peek the queue if needed
|
||||
let entries = match ctime_age.as_ref() {
|
||||
// Avoid false alarms for recently written items still on the healer queue
|
||||
Some(ctime_age) if ctime_age < &*HEAL_MAX_BACKLOG => {
|
||||
queue.get(ctx, key).await?
|
||||
}
|
||||
_ => vec![],
|
||||
};
|
||||
let entries =
|
||||
match (ctime_age.as_ref(), scrub_options.queue_peek_bound.as_ref()) {
|
||||
// Avoid false alarms for recently written items still on the healer queue
|
||||
(Some(ctime_age), Some(bound)) if ctime_age < bound => {
|
||||
queue.get(ctx, key).await?
|
||||
}
|
||||
_ => vec![],
|
||||
};
|
||||
|
||||
// Only attempt the action if we don't know of pending writes from the queue
|
||||
if entries.is_empty() {
|
||||
|
@ -11,7 +11,7 @@ use std::{
|
||||
future::Future,
|
||||
pin::Pin,
|
||||
sync::{Arc, Mutex},
|
||||
time::SystemTime,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
use crate::base::{MultiplexedBlobstoreBase, MultiplexedBlobstorePutHandler};
|
||||
@ -280,6 +280,7 @@ async fn scrub_none(
|
||||
scrub_action: ScrubAction::ReportOnly,
|
||||
scrub_grace: None,
|
||||
scrub_action_on_missing_write_mostly,
|
||||
queue_peek_bound: None,
|
||||
},
|
||||
Arc::new(LoggingScrubHandler::new(false)) as Arc<dyn ScrubHandler>,
|
||||
);
|
||||
@ -672,6 +673,7 @@ async fn scrub_scenarios(fb: FacebookInit, scrub_action_on_missing_write_mostly:
|
||||
scrub_action: ScrubAction::ReportOnly,
|
||||
scrub_grace: None,
|
||||
scrub_action_on_missing_write_mostly,
|
||||
queue_peek_bound: None,
|
||||
},
|
||||
scrub_handler.clone(),
|
||||
);
|
||||
@ -772,8 +774,9 @@ async fn scrub_scenarios(fb: FacebookInit, scrub_action_on_missing_write_mostly:
|
||||
scrub_action: ScrubAction::Repair,
|
||||
scrub_grace: None,
|
||||
scrub_action_on_missing_write_mostly,
|
||||
queue_peek_bound: None,
|
||||
},
|
||||
scrub_handler,
|
||||
scrub_handler.clone(),
|
||||
);
|
||||
|
||||
// Non-existing key in all blobstores, new blobstore failing
|
||||
@ -804,6 +807,59 @@ async fn scrub_scenarios(fb: FacebookInit, scrub_action_on_missing_write_mostly:
|
||||
assert!(get_fut.await.is_err(), "Empty replacement against error");
|
||||
}
|
||||
|
||||
// One working replica after failure, queue lookback means scrub action not performed
|
||||
{
|
||||
// Create with different queue_peek_bound
|
||||
let bs = ScrubBlobstore::new(
|
||||
MultiplexId::new(1),
|
||||
vec![(bid0, bs0.clone()), (bid1, bs1.clone())],
|
||||
vec![(bid2, bs2.clone())],
|
||||
nonzero!(1usize),
|
||||
queue.clone(),
|
||||
MononokeScubaSampleBuilder::with_discard(),
|
||||
nonzero!(1u64),
|
||||
ScrubOptions {
|
||||
scrub_action: ScrubAction::Repair,
|
||||
scrub_grace: None,
|
||||
scrub_action_on_missing_write_mostly,
|
||||
queue_peek_bound: Some(Duration::from_secs(7200)),
|
||||
},
|
||||
scrub_handler,
|
||||
);
|
||||
let v1 = make_value("v1");
|
||||
let k1 = "k1";
|
||||
// Check there is an entry on the queue
|
||||
match queue.get(ctx, k1).await.unwrap().as_slice() {
|
||||
[entry] => {
|
||||
assert_eq!(entry.blobstore_id, bid0, "Queue bad");
|
||||
}
|
||||
_ => panic!("only one entry expected"),
|
||||
}
|
||||
// bs1 and bs2 empty at this point
|
||||
assert_eq!(bs0.get_bytes(k1), Some(v1.clone()));
|
||||
assert!(bs1.storage.with(|s| s.is_empty()));
|
||||
assert!(bs2.storage.with(|s| s.is_empty()));
|
||||
let mut get_fut = bs.get(ctx, k1).map_err(|_| ()).boxed();
|
||||
assert_eq!(PollOnce::new(Pin::new(&mut get_fut)).await, Poll::Pending);
|
||||
// tick the gets
|
||||
bs0.tick(None);
|
||||
assert_eq!(PollOnce::new(Pin::new(&mut get_fut)).await, Poll::Pending);
|
||||
bs1.tick(None);
|
||||
if scrub_action_on_missing_write_mostly != ScrubWriteMostly::PopulateIfAbsent {
|
||||
// this read doesn't happen in this mode
|
||||
bs2.tick(None);
|
||||
}
|
||||
// No repairs to tick, as its on queue within the peek lookback
|
||||
|
||||
// Succeeds
|
||||
assert_eq!(get_fut.await.unwrap().map(|v| v.into()), Some(v1.clone()));
|
||||
|
||||
// bs1 and bs2 still empty at this point. assumption is item on queue will be healed later.
|
||||
assert_eq!(bs0.get_bytes(k1), Some(v1.clone()));
|
||||
assert!(bs1.storage.with(|s| s.is_empty()));
|
||||
assert!(bs2.storage.with(|s| s.is_empty()));
|
||||
}
|
||||
|
||||
// One working replica after failure.
|
||||
{
|
||||
let v1 = make_value("v1");
|
||||
|
@ -73,6 +73,7 @@ pub const BLOBSTORE_PUT_BEHAVIOUR_ARG: &str = "blobstore-put-behaviour";
|
||||
pub const BLOBSTORE_SCRUB_ACTION_ARG: &str = "blobstore-scrub-action";
|
||||
pub const BLOBSTORE_SCRUB_GRACE_ARG: &str = "blobstore-scrub-grace";
|
||||
pub const BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG: &str = "blobstore-scrub-write-mostly-missing";
|
||||
pub const BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG: &str = "blobstore-scrub-queue-peek";
|
||||
|
||||
pub const WITH_READONLY_STORAGE_ARG: &str = "with-readonly-storage";
|
||||
|
||||
@ -189,6 +190,9 @@ pub struct MononokeAppBuilder {
|
||||
|
||||
// Whether to report missing keys in write mostly blobstores as a scrub action when scrubbing
|
||||
scrub_action_on_missing_write_mostly_default: Option<ScrubWriteMostly>,
|
||||
|
||||
// Whether to set a default for how long to peek back at the multiplex queue when scrubbing
|
||||
scrub_queue_peek_bound_secs_default: Option<u64>,
|
||||
}
|
||||
|
||||
/// Things we want to live for the lifetime of the mononoke binary
|
||||
@ -282,6 +286,7 @@ impl MononokeAppBuilder {
|
||||
scrub_action_default: None,
|
||||
scrub_grace_secs_default: None,
|
||||
scrub_action_on_missing_write_mostly_default: None,
|
||||
scrub_queue_peek_bound_secs_default: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -766,6 +771,19 @@ impl MononokeAppBuilder {
|
||||
scrub_grace_arg = scrub_grace_arg
|
||||
.default_value(&FORMATTED.get_or_init(|| format!("{}", default)));
|
||||
}
|
||||
let mut scrub_queue_peek_bound_arg = Arg::with_name(
|
||||
BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG,
|
||||
)
|
||||
.long(BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG)
|
||||
.takes_value(true)
|
||||
.required(false)
|
||||
.requires(BLOBSTORE_SCRUB_ACTION_ARG)
|
||||
.help("Number of seconds within which we consider it worth checking the healer queue.");
|
||||
if let Some(default) = self.scrub_queue_peek_bound_secs_default {
|
||||
static FORMATTED: OnceCell<String> = OnceCell::new(); // Lazy static is nicer to LeakSanitizer than Box::leak
|
||||
scrub_queue_peek_bound_arg = scrub_queue_peek_bound_arg
|
||||
.default_value(&FORMATTED.get_or_init(|| format!("{}", default)));
|
||||
};
|
||||
let mut scrub_action_on_missing_write_mostly_arg =
|
||||
Arg::with_name(BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG)
|
||||
.long(BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG)
|
||||
@ -782,6 +800,7 @@ impl MononokeAppBuilder {
|
||||
app.arg(scrub_action_arg)
|
||||
.arg(scrub_grace_arg)
|
||||
.arg(scrub_action_on_missing_write_mostly_arg)
|
||||
.arg(scrub_queue_peek_bound_arg)
|
||||
} else {
|
||||
app
|
||||
}
|
||||
|
@ -50,13 +50,13 @@ use super::{
|
||||
app::{
|
||||
ArgType, MononokeAppData, BLOBSTORE_BYTES_MIN_THROTTLE_ARG, BLOBSTORE_PUT_BEHAVIOUR_ARG,
|
||||
BLOBSTORE_SCRUB_ACTION_ARG, BLOBSTORE_SCRUB_GRACE_ARG,
|
||||
BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG, CACHELIB_ATTEMPT_ZSTD_ARG, CRYPTO_PATH_REGEX_ARG,
|
||||
DISABLE_TUNABLES, ENABLE_MCROUTER, LOCAL_CONFIGERATOR_PATH_ARG, LOG_EXCLUDE_TAG,
|
||||
LOG_INCLUDE_TAG, MANIFOLD_API_KEY_ARG, MANIFOLD_THRIFT_OPS_ARG,
|
||||
MANIFOLD_WEAK_CONSISTENCY_MS_ARG, MYSQL_CONN_OPEN_TIMEOUT, MYSQL_MASTER_ONLY,
|
||||
MYSQL_MAX_QUERY_TIME, MYSQL_POOL_AGE_TIMEOUT, MYSQL_POOL_IDLE_TIMEOUT, MYSQL_POOL_LIMIT,
|
||||
MYSQL_POOL_PER_KEY_LIMIT, MYSQL_POOL_THREADS_NUM, MYSQL_SQLBLOB_POOL_AGE_TIMEOUT,
|
||||
MYSQL_SQLBLOB_POOL_IDLE_TIMEOUT, MYSQL_SQLBLOB_POOL_LIMIT,
|
||||
BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG, BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG,
|
||||
CACHELIB_ATTEMPT_ZSTD_ARG, CRYPTO_PATH_REGEX_ARG, DISABLE_TUNABLES, ENABLE_MCROUTER,
|
||||
LOCAL_CONFIGERATOR_PATH_ARG, LOG_EXCLUDE_TAG, LOG_INCLUDE_TAG, MANIFOLD_API_KEY_ARG,
|
||||
MANIFOLD_THRIFT_OPS_ARG, MANIFOLD_WEAK_CONSISTENCY_MS_ARG, MYSQL_CONN_OPEN_TIMEOUT,
|
||||
MYSQL_MASTER_ONLY, MYSQL_MAX_QUERY_TIME, MYSQL_POOL_AGE_TIMEOUT, MYSQL_POOL_IDLE_TIMEOUT,
|
||||
MYSQL_POOL_LIMIT, MYSQL_POOL_PER_KEY_LIMIT, MYSQL_POOL_THREADS_NUM,
|
||||
MYSQL_SQLBLOB_POOL_AGE_TIMEOUT, MYSQL_SQLBLOB_POOL_IDLE_TIMEOUT, MYSQL_SQLBLOB_POOL_LIMIT,
|
||||
MYSQL_SQLBLOB_POOL_PER_KEY_LIMIT, MYSQL_SQLBLOB_POOL_THREADS_NUM, READ_BURST_BYTES_ARG,
|
||||
READ_BYTES_ARG, READ_CHAOS_ARG, READ_QPS_ARG, RENDEZVOUS_FREE_CONNECTIONS, RUNTIME_THREADS,
|
||||
TUNABLES_CONFIG, WITH_DYNAMIC_OBSERVABILITY, WITH_READONLY_STORAGE_ARG,
|
||||
@ -670,18 +670,25 @@ fn parse_blobstore_options(
|
||||
.value_of(BLOBSTORE_SCRUB_GRACE_ARG)
|
||||
.map(u64::from_str)
|
||||
.transpose()?;
|
||||
|
||||
let scrub_action_on_missing_write_mostly = matches
|
||||
.value_of(BLOBSTORE_SCRUB_WRITE_MOSTLY_MISSING_ARG)
|
||||
.map(ScrubWriteMostly::from_str)
|
||||
.transpose()?;
|
||||
let blobstore_options = blobstore_options
|
||||
let mut blobstore_options = blobstore_options
|
||||
.with_scrub_action(scrub_action)
|
||||
.with_scrub_grace(scrub_grace);
|
||||
if let Some(v) = scrub_action_on_missing_write_mostly {
|
||||
blobstore_options.with_scrub_action_on_missing_write_mostly(v)
|
||||
} else {
|
||||
blobstore_options
|
||||
blobstore_options = blobstore_options.with_scrub_action_on_missing_write_mostly(v)
|
||||
}
|
||||
let scrub_queue_peek_bound = matches
|
||||
.value_of(BLOBSTORE_SCRUB_QUEUE_PEEK_BOUND_ARG)
|
||||
.map(u64::from_str)
|
||||
.transpose()?;
|
||||
if let Some(v) = scrub_queue_peek_bound {
|
||||
blobstore_options = blobstore_options.with_scrub_queue_peek_bound(v)
|
||||
}
|
||||
blobstore_options
|
||||
} else {
|
||||
blobstore_options
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user