revisionstore: record remote fetches that match a pattern

Summary:
We want to be able to record when fetches to certain paths happen.
Let's add recording infrastructure to the new ReportingRemoteDataStore.

A future diff will make the seen accessible from Python for scuba logging.

Reviewed By: xavierd

Differential Revision: D23462574

fbshipit-source-id: 5d749f2429e26e8e7fe4fb5adc29140b4309eac9
This commit is contained in:
Durham Goode 2020-09-04 14:53:03 -07:00 committed by Facebook GitHub Bot
parent 84cbc26b1e
commit 9772ab1718
3 changed files with 60 additions and 6 deletions

View File

@ -31,6 +31,7 @@ mpatch = { path = "../mpatch" }
lfs_protocol = { path = "../../../mononoke/lfs_protocol" }
parking_lot = "0.9"
rand = "0.7"
regex = "1"
revisionstore_types = { path = "types" }
serde = "1.0.84"
serde_derive = "1.0.84"

View File

@ -12,6 +12,7 @@ use std::{
use anyhow::{format_err, Result};
use bytes::Bytes;
use regex::Regex;
use tracing::info_span;
use configparser::{
@ -397,7 +398,13 @@ impl<'a> ContentStoreBuilder<'a> {
}
let remotestores: Box<dyn RemoteDataStore> = Box::new(remotestores);
let remotestores = Arc::new(ReportingRemoteDataStore::new(remotestores));
let logging_regex = self
.config
.get_opt::<String>("remotefilelog", "undesiredfileregex")?
.map(|s| Regex::new(&s))
.transpose()?;
let remotestores =
Arc::new(ReportingRemoteDataStore::new(remotestores, logging_regex));
datastore.add(remotestores.clone());
Some(remotestores)
} else {

View File

@ -15,6 +15,7 @@ use std::{
use anyhow::{bail, Result};
use bytes::Bytes;
use parking_lot::Mutex;
use regex::Regex;
use serde_derive::{Deserialize, Serialize};
use edenapi_types::{FileEntry, TreeEntry};
@ -205,13 +206,15 @@ pub fn strip_metadata(data: &Bytes) -> Result<(Bytes, Option<Key>)> {
pub struct ReportingRemoteDataStore {
store: Box<dyn RemoteDataStore>,
filter: Option<Regex>,
seen: Mutex<HashSet<RepoPathBuf>>,
}
impl ReportingRemoteDataStore {
pub fn new(store: Box<dyn RemoteDataStore>) -> Self {
pub fn new(store: Box<dyn RemoteDataStore>, filter: Option<Regex>) -> Self {
Self {
store,
filter,
seen: Mutex::new(HashSet::new()),
}
}
@ -220,13 +223,56 @@ impl ReportingRemoteDataStore {
let mut seen = self.seen.lock();
std::mem::take(&mut *seen)
}
fn report_keys(&self, keys: &[StoreKey]) {
if let Some(filter) = &self.filter {
let mut matches = Vec::new();
use crate::StoreKey::*;
for path in keys
.iter()
.filter_map(|k| match k {
HgId(k) => Some(&k.path),
Content(_, Some(k)) => Some(&k.path),
_ => None,
})
.filter(|p| filter.is_match(p.as_str()))
{
matches.push(path.clone());
}
if !matches.is_empty() {
let mut seen = self.seen.lock();
seen.extend(matches.into_iter());
}
}
}
}
impl Deref for ReportingRemoteDataStore {
type Target = dyn RemoteDataStore;
impl LocalStore for ReportingRemoteDataStore {
fn get_missing(&self, keys: &[StoreKey]) -> Result<Vec<StoreKey>> {
self.store.get_missing(keys)
}
}
fn deref(&self) -> &Self::Target {
&self.store
impl HgIdDataStore for ReportingRemoteDataStore {
fn get(&self, key: StoreKey) -> Result<StoreResult<Vec<u8>>> {
self.report_keys(&[key.clone()]);
self.store.get(key)
}
fn get_meta(&self, key: StoreKey) -> Result<StoreResult<Metadata>> {
self.report_keys(&[key.clone()]);
self.store.get_meta(key)
}
}
impl RemoteDataStore for ReportingRemoteDataStore {
fn prefetch(&self, keys: &[StoreKey]) -> Result<Vec<StoreKey>> {
self.report_keys(keys);
self.store.prefetch(keys)
}
fn upload(&self, keys: &[StoreKey]) -> Result<Vec<StoreKey>> {
self.store.upload(keys)
}
}