undesired fetches: regex-based reporting

Summary:
We want to be able to report more than just on one prefix. Instead, let's add a regex-based reporting. To make deployment easier, let's keep both options for now and later just remove prefix-based one.

Note: this diff also changes how a situation with absent `undesired_path_prefix_to_log` is treated. Previously, if `undesired_path_prefix_to_log` is absent, but `"undesired_path_repo_name_to_log": "fbsource"`, it would report every path. Now it won't report any, which I think is a saner behavior. If we do ever want to report every path, we can just add `.*` as a regex.

Reviewed By: StanislavGlebik

Differential Revision: D23447800

fbshipit-source-id: 059109b44256f5703843625b7ab725a243a13056
This commit is contained in:
Kostia Balytskyi 2020-09-01 11:59:18 -07:00 committed by Facebook GitHub Bot
parent e21bc74aeb
commit e7ddc6cc13
4 changed files with 41 additions and 2 deletions

View File

@ -19,6 +19,7 @@ use bytes::Bytes;
use lazy_static::lazy_static;
use quickcheck::{Arbitrary, Gen};
use rand::{seq::SliceRandom, Rng};
use regex::Regex;
use serde_derive::{Deserialize, Serialize};
use crate::bonsai_changeset::BonsaiChangeset;
@ -609,6 +610,11 @@ impl MPath {
current: Some(self),
}
}
pub fn matches_regex(&self, re: &Regex) -> bool {
let s: String = format!("{}", self);
re.is_match(&s)
}
}
impl AsRef<[MPathElement]> for MPath {

View File

@ -52,6 +52,7 @@ lazy_static = "1.0"
maplit = "1.0"
percent-encoding = "2.1"
rand = { version = "0.7", features = ["small_rng"] }
regex = "1.3.7"
serde_json = "1.0"
slog = { version = "2.5", features = ["max_level_debug"] }
thiserror = "1.0"

View File

@ -59,6 +59,7 @@ use mercurial_types::{
use metaconfig_types::{RepoClientKnobs, RepoReadOnly};
use mononoke_repo::{MononokeRepo, SqlStreamingCloneConfig};
use rand::{self, Rng};
use regex::Regex;
use remotefilelog::{
create_getpack_v1_blob, create_getpack_v2_blob, get_unordered_file_history_for_multiple_nodes,
GetpackBlobInfo,
@ -66,7 +67,7 @@ use remotefilelog::{
use revisionstore_types::Metadata;
use scuba_ext::ScubaSampleBuilderExt;
use serde_json::{self, json};
use slog::{debug, info, o};
use slog::{debug, error, info, o};
use stats::prelude::*;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::convert::TryInto;
@ -288,6 +289,7 @@ struct UndesiredPathLogger {
ctx: CoreContext,
repo_needs_logging: bool,
path_prefix_to_log: Option<MPath>,
path_regex_to_log: Option<Regex>,
}
impl UndesiredPathLogger {
@ -302,10 +304,29 @@ impl UndesiredPathLogger {
None
};
let path_regex_to_log = if repo_needs_logging
&& !tunables.get_undesired_path_regex_to_log().is_empty()
{
Some(
Regex::new(tunables.get_undesired_path_regex_to_log().as_str()).map_err(|e| {
error!(
ctx.logger(),
"Error initializing undesired path regex for {}: {}",
repo.name(),
e
);
e
})?,
)
} else {
None
};
Ok(Self {
ctx,
repo_needs_logging,
path_prefix_to_log,
path_regex_to_log,
})
}
@ -341,7 +362,17 @@ impl UndesiredPathLogger {
fn should_log(&self, path: Option<&MPath>) -> bool {
if self.repo_needs_logging {
MPath::is_prefix_of_opt(self.path_prefix_to_log.as_ref(), MPath::iter_opt(path))
let op1 = match self.path_prefix_to_log.as_ref() {
None => false,
Some(prefix) => prefix.is_prefix_of(MPath::iter_opt(path)),
};
let op2 = match (path, self.path_regex_to_log.as_ref()) {
(Some(path), Some(re)) => path.matches_regex(re),
_ => false,
};
op1 || op2
} else {
false
}

View File

@ -71,6 +71,7 @@ pub struct MononokeTunables {
// in a particular repo
undesired_path_repo_name_to_log: TunableString,
undesired_path_prefix_to_log: TunableString,
undesired_path_regex_to_log: TunableString,
pushrebase_disable_rebased_commit_validation: AtomicBool,
filenodes_disabled: AtomicBool,
skiplist_max_skips_without_yield: AtomicI64,