mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 08:47:12 +03:00
mononoke/blobstore_healer: add comments and type annotations
Summary: Basically notes I took for myself to truely understand the code. Reviewed By: StanislavGlebik Differential Revision: D15908406 fbshipit-source-id: 3f21f7a1ddce8e15ceeeffdb5518fd7f5b1749c4
This commit is contained in:
parent
978242fb35
commit
3d27faba08
@ -55,13 +55,15 @@ impl Healer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Heal one batch of entries. It selects a set of entries which are not too young (bounded
|
||||||
|
/// by ENTRY_HEALING_MIN_AGE) up to `blobstore_sync_queue_limit` at once.
|
||||||
pub fn heal(&self, ctx: CoreContext) -> impl Future<Item = (), Error = Error> {
|
pub fn heal(&self, ctx: CoreContext) -> impl Future<Item = (), Error = Error> {
|
||||||
cloned!(
|
cloned!(
|
||||||
self.logger,
|
self.logger,
|
||||||
self.blobstore_sync_queue_limit,
|
self.blobstore_sync_queue_limit,
|
||||||
self.rate_limiter,
|
self.rate_limiter,
|
||||||
self.sync_queue,
|
self.sync_queue,
|
||||||
self.blobstores
|
self.blobstores,
|
||||||
);
|
);
|
||||||
|
|
||||||
let now = DateTime::now().into_chrono();
|
let now = DateTime::now().into_chrono();
|
||||||
@ -73,7 +75,7 @@ impl Healer {
|
|||||||
healing_deadline.clone(),
|
healing_deadline.clone(),
|
||||||
blobstore_sync_queue_limit,
|
blobstore_sync_queue_limit,
|
||||||
)
|
)
|
||||||
.and_then(move |queue_entries| {
|
.and_then(move |queue_entries: Vec<BlobstoreSyncQueueEntry>| {
|
||||||
cloned!(rate_limiter);
|
cloned!(rate_limiter);
|
||||||
|
|
||||||
let healing_futures: Vec<_> = queue_entries
|
let healing_futures: Vec<_> = queue_entries
|
||||||
@ -100,17 +102,20 @@ impl Healer {
|
|||||||
healing_futures.len()
|
healing_futures.len()
|
||||||
);
|
);
|
||||||
|
|
||||||
futures::stream::futures_unordered(healing_futures.into_iter())
|
futures::stream::futures_unordered(healing_futures)
|
||||||
.collect()
|
.collect()
|
||||||
.and_then(move |cleaned_entries| {
|
.and_then(move |cleaned_entries: Vec<Vec<BlobstoreSyncQueueEntry>>| {
|
||||||
let v = cleaned_entries.into_iter().flatten().collect();
|
let cleaned = cleaned_entries.into_iter().flatten().collect();
|
||||||
cleanup_after_healing(ctx, sync_queue, v)
|
cleanup_after_healing(ctx, sync_queue, cleaned)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.map(|_| ())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Heal an individual blob. The `entries` are the blobstores which have successfully stored
|
||||||
|
/// this blob; we need to replicate them onto the remaining `blobstores`. If the blob is not
|
||||||
|
/// yet eligable (too young), then just return None, otherwise we return the healed entries
|
||||||
|
/// which have now been dealt with.
|
||||||
fn heal_blob(
|
fn heal_blob(
|
||||||
ctx: CoreContext,
|
ctx: CoreContext,
|
||||||
sync_queue: Arc<dyn BlobstoreSyncQueue>,
|
sync_queue: Arc<dyn BlobstoreSyncQueue>,
|
||||||
@ -122,7 +127,7 @@ fn heal_blob(
|
|||||||
let seen_blobstores: HashSet<_> = entries
|
let seen_blobstores: HashSet<_> = entries
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|entry| {
|
.filter_map(|entry| {
|
||||||
let id = entry.blobstore_id.clone();
|
let id = entry.blobstore_id;
|
||||||
if blobstores.contains_key(&id) {
|
if blobstores.contains_key(&id) {
|
||||||
Some(id)
|
Some(id)
|
||||||
} else {
|
} else {
|
||||||
@ -167,21 +172,25 @@ fn heal_blob(
|
|||||||
.map(|bid| {
|
.map(|bid| {
|
||||||
let blobstore = blobstores
|
let blobstore = blobstores
|
||||||
.get(&bid)
|
.get(&bid)
|
||||||
.expect("missing_blobstores contains only existing blobstores");
|
.expect("missing_blobstores contains unknown blobstore?");
|
||||||
blobstore
|
blobstore
|
||||||
.put(ctx.clone(), key.clone(), blob.clone())
|
.put(ctx.clone(), key.clone(), blob.clone())
|
||||||
.then(move |result| Ok((bid, result.is_ok())))
|
.then(move |result| Ok((bid, result.is_ok())))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
// XXX(jsgf) Don't really understand this. I'd expect it to filter the missing_blobstores
|
||||||
|
// by put_ok, and then return only those (ie, leave the entries which didn't store
|
||||||
|
// correctly in the queue). This logic seems to report success if everything was successful,
|
||||||
|
// otherwise it re-puts the successes into the queue (via report_partial_heal), and returns
|
||||||
|
// an empty "to be cleaned" vector.
|
||||||
join_all(heal_blobstores).and_then(move |heal_results| {
|
join_all(heal_blobstores).and_then(move |heal_results| {
|
||||||
if heal_results.iter().all(|(_, result)| *result) {
|
if heal_results.iter().all(|(_, put_ok)| *put_ok) {
|
||||||
futures::future::ok(entries).left_future()
|
futures::future::ok(entries).left_future()
|
||||||
} else {
|
} else {
|
||||||
let healed_blobstores =
|
let healed_blobstores = heal_results
|
||||||
heal_results
|
.into_iter()
|
||||||
.into_iter()
|
.filter_map(|(id, put_ok)| Some(id).filter(|_| put_ok));
|
||||||
.filter_map(|(id, result)| if result { Some(id) } else { None });
|
|
||||||
report_partial_heal(ctx, sync_queue, key, healed_blobstores)
|
report_partial_heal(ctx, sync_queue, key, healed_blobstores)
|
||||||
.map(|_| vec![])
|
.map(|_| vec![])
|
||||||
.right_future()
|
.right_future()
|
||||||
@ -192,6 +201,11 @@ fn heal_blob(
|
|||||||
Some(heal_future.right_future())
|
Some(heal_future.right_future())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Fetch a blob by `key` from one of the `seen_blobstores`. This tries them one at at time
|
||||||
|
/// sequentially, until either it find the entry or it fails.
|
||||||
|
/// TODO: if one of the blobstores returns "not found" (None) rather than an error (or success),
|
||||||
|
/// we should add that blobstore to the missing set. (Currently it just fails, which will not
|
||||||
|
/// be recoverable.)
|
||||||
fn fetch_blob(
|
fn fetch_blob(
|
||||||
ctx: CoreContext,
|
ctx: CoreContext,
|
||||||
blobstores: Arc<HashMap<BlobstoreId, Arc<dyn Blobstore>>>,
|
blobstores: Arc<HashMap<BlobstoreId, Arc<dyn Blobstore>>>,
|
||||||
@ -224,7 +238,7 @@ fn fetch_blob(
|
|||||||
Err(_) => return Ok(Loop::Continue(blobstores_to_fetch)),
|
Err(_) => return Ok(Loop::Continue(blobstores_to_fetch)),
|
||||||
Ok(None) => {
|
Ok(None) => {
|
||||||
return Err(format_err!(
|
return Err(format_err!(
|
||||||
"Blobstore {:?} retruned None even though it should contain data",
|
"Blobstore {:?} returned None even though it should contain data",
|
||||||
bid
|
bid
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
@ -236,6 +250,7 @@ fn fetch_blob(
|
|||||||
.from_err()
|
.from_err()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Removed healed entries from the queue.
|
||||||
fn cleanup_after_healing(
|
fn cleanup_after_healing(
|
||||||
ctx: CoreContext,
|
ctx: CoreContext,
|
||||||
sync_queue: Arc<dyn BlobstoreSyncQueue>,
|
sync_queue: Arc<dyn BlobstoreSyncQueue>,
|
||||||
@ -244,6 +259,7 @@ fn cleanup_after_healing(
|
|||||||
sync_queue.del(ctx, entries)
|
sync_queue.del(ctx, entries)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// ??? Don't understand this. This is putting the entries we healed back into the queue?
|
||||||
fn report_partial_heal(
|
fn report_partial_heal(
|
||||||
ctx: CoreContext,
|
ctx: CoreContext,
|
||||||
sync_queue: Arc<dyn BlobstoreSyncQueue>,
|
sync_queue: Arc<dyn BlobstoreSyncQueue>,
|
||||||
|
@ -57,9 +57,9 @@ fn maybe_schedule_healer_for_storage(
|
|||||||
_ => bail_msg!("Repo doesn't use Multiplexed blobstore"),
|
_ => bail_msg!("Repo doesn't use Multiplexed blobstore"),
|
||||||
};
|
};
|
||||||
|
|
||||||
let blobstores = {
|
let blobstores: HashMap<_, BoxFuture<Arc<dyn Blobstore + 'static>, _>> = {
|
||||||
let mut blobstores = HashMap::new();
|
let mut blobstores = HashMap::new();
|
||||||
for (id, args) in blobstores_args.into_iter() {
|
for (id, args) in blobstores_args {
|
||||||
match args {
|
match args {
|
||||||
BlobConfig::Manifold { bucket, prefix } => {
|
BlobConfig::Manifold { bucket, prefix } => {
|
||||||
let blobstore = ThriftManifoldBlob::new(bucket)
|
let blobstore = ThriftManifoldBlob::new(bucket)
|
||||||
@ -138,23 +138,24 @@ fn maybe_schedule_healer_for_storage(
|
|||||||
replication_lag_db_conns.push(conn_builder.build_read_only());
|
replication_lag_db_conns.push(conn_builder.build_read_only());
|
||||||
}
|
}
|
||||||
|
|
||||||
let heal = blobstores.and_then(move |blobstores| {
|
let heal = blobstores.and_then(
|
||||||
let repo_healer = Healer::new(
|
move |blobstores: HashMap<_, Arc<dyn Blobstore + 'static>>| {
|
||||||
logger.clone(),
|
let repo_healer = Healer::new(
|
||||||
blobstore_sync_queue_limit,
|
logger.clone(),
|
||||||
rate_limiter,
|
blobstore_sync_queue_limit,
|
||||||
sync_queue,
|
rate_limiter,
|
||||||
Arc::new(blobstores),
|
sync_queue,
|
||||||
);
|
Arc::new(blobstores),
|
||||||
|
);
|
||||||
|
|
||||||
if dry_run {
|
if dry_run {
|
||||||
// TODO(luk) use a proper context here and put the logger inside of it
|
let ctx = CoreContext::new_with_logger(logger);
|
||||||
let ctx = CoreContext::test_mock();
|
repo_healer.heal(ctx).boxify()
|
||||||
repo_healer.heal(ctx).boxify()
|
} else {
|
||||||
} else {
|
schedule_everlasting_healing(logger, repo_healer, replication_lag_db_conns)
|
||||||
schedule_everlasting_healing(logger, repo_healer, replication_lag_db_conns)
|
}
|
||||||
}
|
},
|
||||||
});
|
);
|
||||||
Ok(myrouter::wait_for_myrouter(myrouter_port, db_address)
|
Ok(myrouter::wait_for_myrouter(myrouter_port, db_address)
|
||||||
.and_then(|_| heal)
|
.and_then(|_| heal)
|
||||||
.boxify())
|
.boxify())
|
||||||
@ -168,8 +169,7 @@ fn schedule_everlasting_healing(
|
|||||||
let replication_lag_db_conns = Arc::new(replication_lag_db_conns);
|
let replication_lag_db_conns = Arc::new(replication_lag_db_conns);
|
||||||
|
|
||||||
let fut = loop_fn((), move |()| {
|
let fut = loop_fn((), move |()| {
|
||||||
// TODO(luk) use a proper context here and put the logger inside of it
|
let ctx = CoreContext::new_with_logger(logger.clone());
|
||||||
let ctx = CoreContext::test_mock();
|
|
||||||
|
|
||||||
cloned!(logger, replication_lag_db_conns);
|
cloned!(logger, replication_lag_db_conns);
|
||||||
repo_healer.heal(ctx).and_then(move |()| {
|
repo_healer.heal(ctx).and_then(move |()| {
|
||||||
@ -236,7 +236,7 @@ fn setup_app<'a, 'b>() -> App<'a, 'b> {
|
|||||||
--sync-queue-limit=[LIMIT] 'set limit for how many queue entries to process'
|
--sync-queue-limit=[LIMIT] 'set limit for how many queue entries to process'
|
||||||
--dry-run 'performs a single healing and prints what would it do without doing it'
|
--dry-run 'performs a single healing and prints what would it do without doing it'
|
||||||
--db-regions=[REGIONS] 'comma-separated list of db regions where db replication lag is monitored'
|
--db-regions=[REGIONS] 'comma-separated list of db regions where db replication lag is monitored'
|
||||||
--storage-id=[STORAGE_ID] 'id of storage to be healed'
|
--storage-id=[STORAGE_ID] 'id of storage to be healed, e.g. manifold_xdb_multiplex'
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user