mononoke/lfs_server: allow enabling rate limits probabilistically

Summary:
If we exceed a rate limit, we probably don't want to just drop 100% of traffic.
This would create a sawtooth pattern where we allow a bunch of traffic, update
our counters, drop a bunch of traffic, update our counters again, allow a bunch
of traffic, etc.

To fix this, let's make limits probabilistic. This lets us say "beyond X GB/s,
drop Y% of traffic", which is closer to a sane rate limit.

It might also make sense to eventually change this to use ratelim. Initially,
we didn't do this because we needed our rate limiting decisions to be local to
a single host (because different hosts served different traffic), but now that
we spread the load for popular blobs across the whole tier, we should be able
to just delegate to ratelim.

For now, however, let's finish this bit of a functionality so we can turn it
on.

The corresponding Configerator change is here: D23472683

Reviewed By: aslpavel

Differential Revision: D23472945

fbshipit-source-id: f7d985fded3cdbbcea3bc8cef405224ff5426a25
This commit is contained in:
Thomas Orozco 2020-09-02 11:00:32 -07:00 committed by Facebook GitHub Bot
parent 32609a44ac
commit b8e197fdb4
3 changed files with 63 additions and 7 deletions

View File

@ -21,6 +21,7 @@ pub struct RawLimit {
pub sleep_ms: i64,
pub max_jitter_ms: i64,
pub client_identities: Vec<String>,
pub probability_pct: i64,
}
/// Struct representing actual config data.
@ -45,10 +46,10 @@ pub struct Limit {
client_identities: Vec<MononokeIdentity>,
}
impl TryFrom<&RawLimit> for Limit {
impl TryFrom<RawLimit> for Limit {
type Error = anyhow::Error;
fn try_from(value: &RawLimit) -> Result<Self, Self::Error> {
fn try_from(value: RawLimit) -> Result<Self, Self::Error> {
let client_identities = value
.client_identities
.iter()
@ -56,7 +57,7 @@ impl TryFrom<&RawLimit> for Limit {
.collect::<Result<Vec<_>, _>>()?;
Ok(Self {
raw_limit: value.clone(),
raw_limit: value,
client_identities,
})
}
@ -77,6 +78,7 @@ impl<'de> Deserialize<'de> for ServerConfig {
let try_throttle_limits = raw_server_config
.throttle_limits
.iter()
.cloned()
.map(Limit::try_from)
.collect::<Result<Vec<_>, _>>();
@ -163,4 +165,7 @@ impl Limit {
pub fn client_identities(&self) -> Vec<MononokeIdentity> {
self.client_identities.clone()
}
pub fn probability_pct(&self) -> i64 {
self.raw_limit.probability_pct
}
}

View File

@ -7,7 +7,7 @@
use cached_config::ConfigHandle;
use fbinit::FacebookInit;
use futures::future::{self, FutureExt};
use futures::future::FutureExt;
use gotham::{handler::HandlerFuture, middleware::Middleware, state::State};
use gotham_derive::NewMiddleware;
use gotham_ext::error::HttpError;
@ -64,6 +64,10 @@ impl Middleware for ThrottleMiddleware {
continue;
}
if !limit_applies_probabilistically(&limit) {
continue;
}
if let Some(err) = is_limit_exceeded(self.fb, &limit.counter(), limit.limit()) {
let err = HttpError::e429(err);
@ -77,9 +81,15 @@ impl Middleware for ThrottleMiddleware {
let total_sleep_ms = sleep_ms + jitter;
return tokio::time::delay_for(Duration::from_millis(total_sleep_ms))
.then(move |()| future::ready(http_error_to_handler_error(err, state)))
.boxed();
let res = async move {
if total_sleep_ms > 0 {
tokio::time::delay_for(Duration::from_millis(total_sleep_ms)).await;
}
http_error_to_handler_error(err, state)
}
.boxed();
return res;
}
}
@ -112,3 +122,43 @@ fn limit_applies_to_client(limit: &Limit, client_identity: &Option<&MononokeIden
.any(|presented_id| presented_id == configured_id)
})
}
fn limit_applies_probabilistically(limit: &Limit) -> bool {
limit.probability_pct() > rand::thread_rng().gen_range(0, 100)
}
#[cfg(test)]
mod test {
use super::*;
use crate::config::RawLimit;
#[test]
fn test_limit_applies_probabilistically() {
let base = RawLimit {
counter: "".to_string(),
limit: 0,
sleep_ms: 0,
max_jitter_ms: 0,
client_identities: vec![],
probability_pct: 0,
};
let l0: Limit = RawLimit {
probability_pct: 0,
..base.clone()
}
.try_into()
.unwrap();
assert!(!limit_applies_probabilistically(&l0));
let l100: Limit = RawLimit {
probability_pct: 100,
..base.clone()
}
.try_into()
.unwrap();
assert!(limit_applies_probabilistically(&l100));
}
}

View File

@ -21,6 +21,7 @@
> "limit": 10,
> "sleep_ms": 1000,
> "max_jitter_ms": 100,
> "probability_pct": 100,
> "client_identities": [
> "USER:myusername0"
> ]