sampling: move sampling config to separate crate

Summary: I want to expose the sampling file to Python, so first let's move it to a non-tracing related crate.

Reviewed By: zzl0

Differential Revision: D44645801

fbshipit-source-id: 14da917acdc3529e17502d81846229e3ae8854e2
This commit is contained in:
Muir Manders 2023-04-04 17:57:02 -07:00 committed by Facebook GitHub Bot
parent 08504be6cb
commit 7f630ef746
7 changed files with 98 additions and 69 deletions

View File

@ -100,6 +100,7 @@ members = [
"lib/revlogindex",
"lib/revsets",
"lib/runlog",
"lib/sampling",
"lib/sparse",
"lib/spawn-ext",
"lib/status",

View File

@ -59,6 +59,7 @@ repo = { version = "0.1.0", path = "../repo" }
revisionstore = { version = "0.1.0", path = "../revisionstore" }
revsets = { version = "0.1.0", path = "../revsets" }
runlog = { version = "0.1.0", path = "../runlog" }
sampling = { version = "0.1.0", path = "../sampling" }
serde = { version = "1.0.136", features = ["derive", "rc"] }
serde_json = { version = "1.0.79", features = ["float_roundtrip", "unbounded_depth"] }
status = { version = "0.1.0", path = "../status" }

View File

@ -40,11 +40,11 @@ use once_cell::sync::OnceCell;
use parking_lot::Mutex;
use progress_model::Registry;
use repo::repo::Repo;
use sampling::SamplingConfig;
use tracing::dispatcher;
use tracing::dispatcher::Dispatch;
use tracing::Level;
use tracing_collector::TracingData;
use tracing_sampler::SamplingConfig;
use tracing_sampler::SamplingLayer;
use tracing_subscriber::fmt::format::FmtSpan;
use tracing_subscriber::fmt::Layer as FmtLayer;

View File

@ -0,0 +1,10 @@
# @generated by autocargo
[package]
name = "sampling"
version = "0.1.0"
edition = "2021"
[dependencies]
configmodel = { version = "0.1.0", path = "../config/model" }
parking_lot = { version = "0.11.2", features = ["send_guard"] }

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use std::collections::HashMap;
use std::fs::File;
use std::fs::OpenOptions;
use std::path::PathBuf;
use parking_lot::Mutex;
use parking_lot::MutexGuard;
#[derive(Debug)]
pub struct SamplingConfig {
keys: HashMap<String, String>,
file: Mutex<File>,
}
impl SamplingConfig {
pub fn new(config: &dyn configmodel::Config) -> Option<Self> {
let sample_categories: HashMap<String, String> = config
.keys("sampling")
.into_iter()
.filter_map(|name| {
if let Some(key) = name.strip_prefix("key.") {
if let Some(val) = config.get("sampling", &name) {
return Some((key.to_string(), val.to_string()));
}
}
None
})
.collect();
if sample_categories.is_empty() {
return None;
}
if let Some(output_file) = sampling_output_file(config) {
if let Ok(file) = OpenOptions::new()
.create(true)
.write(true)
.append(true)
.open(output_file)
{
return Some(Self {
keys: sample_categories,
file: Mutex::new(file),
});
}
}
None
}
pub fn category(&self, key: &str) -> Option<&str> {
self.keys.get(key).map(|c| &**c)
}
pub fn file(&self) -> MutexGuard<File> {
self.file.lock()
}
}
fn sampling_output_file(config: &dyn configmodel::Config) -> Option<PathBuf> {
let mut candidates: Vec<PathBuf> = Vec::with_capacity(2);
if let Ok(path) = std::env::var("SCM_SAMPLING_FILEPATH") {
candidates.push(path.into());
}
if let Some(path) = config.get("sampling", "filepath") {
candidates.push(path.to_string().into());
}
candidates
.into_iter()
.find(|path| path.parent().map_or(false, |d| d.exists()))
}

View File

@ -6,9 +6,8 @@ version = "0.1.0"
edition = "2021"
[dependencies]
configmodel = { version = "0.1.0", path = "../config/model" }
once_cell = "1.12"
parking_lot = { version = "0.11.2", features = ["send_guard"] }
sampling = { version = "0.1.0", path = "../sampling" }
serde = { version = "1.0.136", features = ["derive", "rc"] }
serde_json = { version = "1.0.79", features = ["float_roundtrip", "unbounded_depth"] }
tracing = "0.1.35"

View File

@ -5,15 +5,11 @@
* GNU General Public License version 2.
*/
use std::collections::HashMap;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::Write;
use std::path::PathBuf;
use std::sync::Arc;
use once_cell::sync::OnceCell;
use parking_lot::Mutex;
use sampling::SamplingConfig;
use serde::ser::SerializeMap;
use serde::ser::Serializer;
use serde_json::Serializer as JsonSerializer;
@ -49,13 +45,13 @@ impl<S: Subscriber> Layer<S> for SamplingLayer {
None => return,
};
let category = match config.keys.get(event.metadata().target()) {
let category = match config.category(event.metadata().target()) {
Some(v) => v,
None => return,
};
let serialize = || -> std::io::Result<()> {
let mut file = config.file.lock();
let mut file = config.file();
let mut serializer = JsonSerializer::new(&*file);
let mut serializer = serializer.serialize_map(None)?;
@ -72,48 +68,6 @@ impl<S: Subscriber> Layer<S> for SamplingLayer {
}
}
#[derive(Debug)]
pub struct SamplingConfig {
keys: HashMap<String, String>,
file: Mutex<File>,
}
impl SamplingConfig {
pub fn new(config: &dyn configmodel::Config) -> Option<Self> {
let sample_categories: HashMap<String, String> = config
.keys("sampling")
.into_iter()
.filter_map(|name| {
if let Some(key) = name.strip_prefix("key.") {
if let Some(val) = config.get("sampling", &name) {
return Some((key.to_string(), val.to_string()));
}
}
None
})
.collect();
if sample_categories.is_empty() {
return None;
}
if let Some(output_file) = sampling_output_file(config) {
if let Ok(file) = OpenOptions::new()
.create(true)
.append(true)
.write(true)
.open(output_file)
{
return Some(Self {
keys: sample_categories,
file: Mutex::new(file),
});
}
}
None
}
}
struct SamplingFilter {
config: Arc<OnceCell<SamplingConfig>>,
}
@ -125,7 +79,7 @@ impl SamplingFilter {
None => return false,
};
config.keys.get(meta.target()).is_some()
config.category(meta.target()).is_some()
}
}
@ -143,22 +97,6 @@ impl<S: Subscriber> Filter<S> for SamplingFilter {
}
}
fn sampling_output_file(config: &dyn configmodel::Config) -> Option<PathBuf> {
let mut candidates: Vec<PathBuf> = Vec::with_capacity(2);
if let Ok(path) = std::env::var("SCM_SAMPLING_FILEPATH") {
candidates.push(path.into());
}
if let Some(path) = config.get("sampling", "filepath") {
candidates.push(path.to_string().into());
}
candidates
.into_iter()
.find(|path| path.parent().map_or(false, |d| d.exists()))
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;