mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 00:45:18 +03:00
mononoke/blobstore_healer: handle missing regions gracefully
Summary: There's a few things broken with common/rust/sql and the blobstore healer's handling of replication lag right now: - If Seconds_Behind_Master isn't an int (it'll be NULL if replications is paused), it just panics. - If it's talking to a server that it expected to be a replica but is a master, it returns None for the replication lag, but 0 would be more appropriate. - If a region no longer has a replica, it errors out. This diff fixes that: - If replication is paused, we return None for lag. - If we're talking to a master, we return 0. - If a region has no replica, we ignore it. Reviewed By: StanislavGlebik Differential Revision: D17787580 fbshipit-source-id: 9e5e7682456870b88910afec12e1c409fd8c5ba6
This commit is contained in:
parent
91fd7e2035
commit
6c29aad4a0
@ -27,6 +27,7 @@ use futures_ext::{spawn_future, BoxFuture, FutureExt};
|
||||
use healer::Healer;
|
||||
use manifoldblob::ThriftManifoldBlob;
|
||||
use metaconfig_types::{BlobConfig, MetadataDBConfig, StorageConfig};
|
||||
use mysql_async::error::Error as MysqlAsyncError;
|
||||
use prefixblob::PrefixBlobstore;
|
||||
use slog::{error, info, o, Logger};
|
||||
use sql::{myrouter, Connection};
|
||||
@ -192,14 +193,30 @@ fn ensure_small_db_replication_lag(
|
||||
.iter()
|
||||
.map(|(region, conn)| {
|
||||
cloned!(region);
|
||||
conn.show_replica_lag_secs().and_then(|maybe_secs| {
|
||||
maybe_secs
|
||||
.ok_or(format_err!(
|
||||
"Could not fetch db replication lag for {}. Failing to avoid overloading db",
|
||||
region
|
||||
))
|
||||
.map(|lag_secs| (region, lag_secs))
|
||||
})
|
||||
|
||||
conn.show_replica_lag_secs()
|
||||
.or_else(|err| match err.downcast_ref::<MysqlAsyncError>() {
|
||||
Some(MysqlAsyncError::Server(inner)) => {
|
||||
// 1918 is discovery failed (i.e. there is no server matching the
|
||||
// constraints). This is fine, that means we don't need to monitor it.
|
||||
if inner.code == 1918 {
|
||||
Ok(Some(0))
|
||||
} else {
|
||||
Err(err)
|
||||
}
|
||||
},
|
||||
_ => Err(err),
|
||||
})
|
||||
.and_then(|maybe_secs| {
|
||||
let err = format_err!(
|
||||
"Could not fetch db replication lag for {}. Failing to avoid overloading db",
|
||||
region
|
||||
);
|
||||
|
||||
maybe_secs
|
||||
.ok_or(err)
|
||||
.map(|lag_secs| (region, lag_secs))
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user