mirror of
https://github.com/facebook/sapling.git
synced 2024-10-04 22:07:44 +03:00
Add process name exclsuion and log reporting for NFS crawling detection
Summary: * Added new helper readProcessIdsForPath to common/utils to obtain a list of pids that have open files/paths in the specified path. * Integrated readProcessIdsForPath into the NFS heavy fetch detection logic - enumerating over mount points and collecting the pids for each that have open files therein. Reviewed By: MichaelCuevas Differential Revision: D47276991 fbshipit-source-id: bfee34718a23ab5aca7ebff32577374dadd35af9
This commit is contained in:
parent
21893e2149
commit
53d7ac31cf
@ -948,6 +948,14 @@ class EdenConfig : private ConfigSettingManager {
|
||||
100,
|
||||
this};
|
||||
|
||||
/**
|
||||
* Sets the process name exclusions NFS crawling to ignore.
|
||||
*/
|
||||
ConfigSetting<std::unordered_set<std::string>> nfsCrawlExcludedProcessNames{
|
||||
"experimental:nfs-crawl-excluded-process-names",
|
||||
{},
|
||||
this};
|
||||
|
||||
/**
|
||||
* Controls whether EdenFS uses EdenApi to import data from remote.
|
||||
*
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <atomic>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
@ -757,7 +758,7 @@ void EdenServer::updatePeriodicTaskIntervals(const EdenConfig& config) {
|
||||
config.enableNfsCrawlDetection.getValue()) {
|
||||
auto interval = config.nfsCrawlInterval.getValue();
|
||||
XLOGF(
|
||||
INFO,
|
||||
DBG2,
|
||||
"NFS crawl detection enabled. Using interval = {}ns",
|
||||
interval.count());
|
||||
detectNfsCrawlTask_.updateInterval(
|
||||
@ -2346,38 +2347,63 @@ void EdenServer::detectNfsCrawl() {
|
||||
.value_or(0);
|
||||
if (readCount > readThreshold || readDirCount > readDirThreshold) {
|
||||
XLOGF(
|
||||
INFO,
|
||||
"Nfs crawl detected, initiating process discovery and attribution: "
|
||||
"[nfs.read_us.count.60 = {} > {} or nfs.readdir[plus]_us.count.60 = {} > {}.",
|
||||
DBG2,
|
||||
"NFS crawl detected, initiating process discovery and attribution: "
|
||||
"[nfs.read_us.count.60 = {} > {} or nfs.readdir[plus]_us.count.60 = {} > {}]",
|
||||
readCount,
|
||||
readThreshold,
|
||||
readDirCount,
|
||||
readDirThreshold);
|
||||
|
||||
// Get list of excluded process names
|
||||
auto exclusions = edenConfig->nfsCrawlExcludedProcessNames.getValue();
|
||||
|
||||
// Iterate over each mount
|
||||
auto mountPoints = getMountPoints();
|
||||
for (auto& mountPointHandle : mountPoints) {
|
||||
folly::via(
|
||||
getServerState()->getThreadPool().get(), [this, mountPointHandle]() {
|
||||
auto& mountPoint = mountPointHandle.getEdenMount();
|
||||
if (mountPoint.isNfsdChannel()) {
|
||||
auto pids =
|
||||
proc_util::readProcessIdsForPath(mountPoint.getPath());
|
||||
XLOGF(
|
||||
INFO,
|
||||
"NFS crawl detection found {} processes opening files in mount point: {}",
|
||||
pids.size(),
|
||||
mountPoint.getPath());
|
||||
if (mountPointHandle.getEdenMount().isNfsdChannel()) {
|
||||
folly::via(
|
||||
getServerState()->getThreadPool().get(),
|
||||
[serverState = serverState_, mountPointHandle, exclusions]() {
|
||||
const auto& mount = mountPointHandle.getEdenMount();
|
||||
|
||||
// Get list of pids that have open files/paths on the mount
|
||||
auto pids = proc_util::readProcessIdsForPath(mount.getPath());
|
||||
for (auto pid : pids) {
|
||||
auto processNameHandle =
|
||||
serverState_->getProcessNameCache()->lookup(pid);
|
||||
XLOGF(
|
||||
INFO,
|
||||
"NFS detected process: {} ({})",
|
||||
processNameHandle.get(),
|
||||
pid);
|
||||
auto simpleName = proc_util::readProcessSimpleName(pid);
|
||||
if (simpleName.has_value()) {
|
||||
if (exclusions.find(simpleName.value()) == exclusions.end()) {
|
||||
// Log process hierarchy
|
||||
auto hierarchy = proc_util::getProcessHierarchy(
|
||||
serverState->getProcessNameCache(), pid);
|
||||
XCHECK(
|
||||
!hierarchy.empty(),
|
||||
"proc_util::getProcessHierarchy returned an empty list.");
|
||||
auto [_pid, sname, pname] = std::move(hierarchy.top());
|
||||
hierarchy.pop();
|
||||
std::string output = fmt::format(
|
||||
"NFS crawl detection found process with open files in mount point: {}\n {} ({}): {}",
|
||||
mount.getPath(),
|
||||
sname,
|
||||
_pid,
|
||||
pname);
|
||||
while (!hierarchy.empty()) {
|
||||
fmt::format_to(std::back_inserter(output), "\n");
|
||||
auto [_pid, sname, pname] = std::move(hierarchy.top());
|
||||
hierarchy.pop();
|
||||
fmt::format_to(
|
||||
std::back_inserter(output),
|
||||
" ->{}({}): {}",
|
||||
sname,
|
||||
_pid,
|
||||
pname);
|
||||
}
|
||||
XLOGF(DBG2, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include <array>
|
||||
#include <fstream>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include <folly/Conv.h>
|
||||
@ -23,6 +24,7 @@
|
||||
#include <mach/mach_init.h> // @manual
|
||||
#include <mach/task.h> // @manual
|
||||
#include <mach/task_info.h> // @manual
|
||||
#include <sys/proc_info.h> // @manual
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
@ -270,7 +272,7 @@ ProcessList readProcessIdsForPath(FOLLY_MAYBE_UNUSED const AbsolutePath& path) {
|
||||
// If anything went wrong, resize buffer to 0 size.
|
||||
if (pids_size < 0) {
|
||||
XLOGF(
|
||||
WARNING,
|
||||
INFO,
|
||||
"proc_listpidspath failed: {} ()",
|
||||
folly::errnoStr(pids_size),
|
||||
pids_size);
|
||||
@ -284,4 +286,79 @@ ProcessList readProcessIdsForPath(FOLLY_MAYBE_UNUSED const AbsolutePath& path) {
|
||||
return pids;
|
||||
}
|
||||
|
||||
std::optional<ProcessSimpleName> readProcessSimpleName(
|
||||
FOLLY_MAYBE_UNUSED pid_t pid) {
|
||||
std::optional<ProcessSimpleName> simpleName;
|
||||
#ifdef __APPLE__
|
||||
// Max length of process name returned from proc_name
|
||||
// https://opensource.apple.com/source/xnu/xnu-1228.0.2/bsd/sys/proc_info.h.auto.html
|
||||
std::vector<char> name;
|
||||
int32_t len = 2 * MAXCOMLEN + 1;
|
||||
name.resize(len);
|
||||
auto namePtr = name.data();
|
||||
|
||||
auto ret = proc_name(pid, namePtr, len);
|
||||
if (ret > len) {
|
||||
// This should never happen.
|
||||
XLOGF(INFO, "proc_name return length greater than provided buffer.");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
name.resize(ret);
|
||||
simpleName.emplace(std::string(name.begin(), name.end()));
|
||||
} else {
|
||||
XLOGF(DBG2, "proc_name failed: {} ({})", folly::errnoStr(errno), errno);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simpleName;
|
||||
}
|
||||
|
||||
std::optional<pid_t> getParentProcessId(FOLLY_MAYBE_UNUSED pid_t pid) {
|
||||
std::optional<pid_t> ppid;
|
||||
#ifdef __APPLE__
|
||||
// Future improvements might include caching of parent pid lookups. However,
|
||||
// as pids are recycled over time we would need some way to invalidate the
|
||||
// cache when necessary.
|
||||
proc_bsdinfo info;
|
||||
int32_t size = sizeof(info);
|
||||
auto ret = proc_pidinfo(
|
||||
pid,
|
||||
PROC_PIDTBSDINFO,
|
||||
true, // find zombies
|
||||
&info,
|
||||
size);
|
||||
|
||||
if (ret == 0) {
|
||||
XLOGF(DBG3, "proc_pidinfo failed: {} ({})", folly::errnoStr(errno), errno);
|
||||
} else if (ret != size) {
|
||||
XLOGF(WARN, "proc_pidinfo failed returned an invalid size");
|
||||
} else if (info.pbi_ppid <= 0) {
|
||||
XLOGF(WARN, "proc_pidinfo returned an invalid parent pid.");
|
||||
} else {
|
||||
ppid.emplace(info.pbi_ppid);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ppid;
|
||||
}
|
||||
|
||||
std::stack<std::tuple<pid_t, std::string, ProcessName>> getProcessHierarchy(
|
||||
std::shared_ptr<ProcessNameCache> processNameCache,
|
||||
pid_t pid) {
|
||||
std::stack<std::tuple<pid_t, std::string, ProcessName>> hierarchy;
|
||||
do {
|
||||
auto simpleName = proc_util::readProcessSimpleName(pid);
|
||||
hierarchy.emplace(
|
||||
pid,
|
||||
simpleName.value_or("<unknown>"),
|
||||
processNameCache->lookup(pid).get());
|
||||
pid = proc_util::getParentProcessId(pid).value_or(0);
|
||||
// Exit when reaching the root process (not included).
|
||||
} while (pid > 1);
|
||||
|
||||
return hierarchy;
|
||||
}
|
||||
|
||||
} // namespace facebook::eden::proc_util
|
||||
|
@ -8,9 +8,12 @@
|
||||
#pragma once
|
||||
#include <folly/Range.h>
|
||||
#include <optional>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "eden/common/utils/ProcessName.h"
|
||||
#include "eden/common/utils/ProcessNameCache.h"
|
||||
#include "eden/fs/utils/PathFuncs.h"
|
||||
|
||||
namespace facebook::eden {
|
||||
@ -130,5 +133,31 @@ using ProcessList = std::vector<pid_t>;
|
||||
*/
|
||||
ProcessList readProcessIdsForPath(const AbsolutePath& path);
|
||||
|
||||
/**
|
||||
* Stores a simple process name - just the name of the process. This is in
|
||||
* contrast to ProcessName which stores the process command line minus the
|
||||
* process path.
|
||||
*/
|
||||
using ProcessSimpleName = std::string;
|
||||
|
||||
/**
|
||||
* Fetches the process name for the specified process ID. If the pid is invalid
|
||||
* or an error occurs while fetching, returns std::nullopt.
|
||||
*/
|
||||
std::optional<ProcessSimpleName> readProcessSimpleName(pid_t pid);
|
||||
|
||||
/**
|
||||
* Get the parent process ID of the specified process ID, if one exists.
|
||||
*/
|
||||
std::optional<pid_t> getParentProcessId(pid_t pid);
|
||||
|
||||
/**
|
||||
* Get the process hierarchy, as a stack of tuples of pid, simple name and
|
||||
* ProcessName, of the specified process ID.
|
||||
*/
|
||||
std::stack<std::tuple<pid_t, std::string, ProcessName>> getProcessHierarchy(
|
||||
std::shared_ptr<ProcessNameCache> processNameCache,
|
||||
pid_t pid);
|
||||
|
||||
} // namespace proc_util
|
||||
} // namespace facebook::eden
|
||||
|
Loading…
Reference in New Issue
Block a user