lfs: add some error code counters

Summary:
Add some counters to track lfs transient/fatal error response codes:
- "lfs.transient_error.{method}.{code}" counts error codes we got but ended up
  succeeding after retries
- "lfs.fatal_error.{method}.{code}" counts errors we got and then ended up not
  succeeding
- "lfs.success.{method}" counts successful queries

Reviewed By: quark-zju

Differential Revision: D33962157

fbshipit-source-id: 901625eb72cac2eba57f4b2424a778f4f8ac1779
This commit is contained in:
Muir Manders 2022-02-11 09:38:52 -08:00 committed by Facebook GitHub Bot
parent 2f90e6ddb3
commit 1884a15a19
2 changed files with 27 additions and 2 deletions

View File

@ -21,6 +21,7 @@ edenapi_types = { version = "0.1.0", path = "../edenapi/types" }
futures = { version = "0.3.13", features = ["async-await", "compat"] }
hex = "0.4.3"
hg-http = { version = "0.1.0", path = "../hg-http" }
hg-metrics = { path = "../hg-metrics" }
hgtime = { version = "0.1.0", path = "../hgtime" }
http = "0.2"
http-client = { version = "0.1.0", path = "../http-client" }

View File

@ -43,7 +43,6 @@ use futures::stream::StreamExt;
use futures::stream::TryStreamExt;
use hg_http::http_client;
use hg_http::http_config;
use http::header::HeaderMap;
use http::status::StatusCode;
use http_client::Encoding;
use http_client::HttpClient;
@ -1128,6 +1127,8 @@ impl LfsRemoteInner {
let mut rng = thread_rng();
let mut attempt = 0;
let mut seen_error_codes = HashSet::new();
loop {
attempt += 1;
@ -1214,12 +1215,23 @@ impl LfsRemoteInner {
.await;
let error = match res {
Ok(res) => return Ok(res),
Ok(res) => {
for code in seen_error_codes {
// Record that we saw this error code, but it went away on retry.
hg_metrics::increment_counter(
format!("lfs.transient_error.{}.{}", method, code),
1,
);
}
hg_metrics::increment_counter(format!("lfs.success.{}", method), 1);
return Ok(res);
}
Err(error) => error,
};
let retry_strategy = match &error {
TransferError::HttpStatus(status, _) => {
seen_error_codes.insert(*status);
RetryStrategy::from_http_status(*status)
}
TransferError::HttpClientError(http_error) => {
@ -1249,6 +1261,18 @@ impl LfsRemoteInner {
continue;
}
if seen_error_codes.is_empty() {
hg_metrics::increment_counter(format!("lfs.fatal_error.{}.other", method), 1);
}
for code in seen_error_codes {
// Record that we saw this error code and ended up failing.
hg_metrics::increment_counter(
format!("lfs.fatal_error.{}.{}", method, code),
1,
);
}
return Err(FetchError { url, method, error });
}
}