Add process name exclsuion and log reporting for NFS crawling detection

Summary:
* Added new helper readProcessIdsForPath to common/utils to obtain a list of pids that have open files/paths in the specified path.
* Integrated readProcessIdsForPath into the NFS heavy fetch detection logic - enumerating over mount points and collecting the pids for each that have open files therein.

Reviewed By: MichaelCuevas

Differential Revision: D47276991

fbshipit-source-id: bfee34718a23ab5aca7ebff32577374dadd35af9
This commit is contained in:
John Elliott 2023-07-07 19:48:02 -07:00 committed by Facebook GitHub Bot
parent 21893e2149
commit 53d7ac31cf
4 changed files with 165 additions and 25 deletions

View File

@ -948,6 +948,14 @@ class EdenConfig : private ConfigSettingManager {
100,
this};
/**
* Sets the process name exclusions NFS crawling to ignore.
*/
ConfigSetting<std::unordered_set<std::string>> nfsCrawlExcludedProcessNames{
"experimental:nfs-crawl-excluded-process-names",
{},
this};
/**
* Controls whether EdenFS uses EdenApi to import data from remote.
*

View File

@ -14,6 +14,7 @@
#include <atomic>
#include <fstream>
#include <functional>
#include <iterator>
#include <memory>
#include <sstream>
#include <string>
@ -757,7 +758,7 @@ void EdenServer::updatePeriodicTaskIntervals(const EdenConfig& config) {
config.enableNfsCrawlDetection.getValue()) {
auto interval = config.nfsCrawlInterval.getValue();
XLOGF(
INFO,
DBG2,
"NFS crawl detection enabled. Using interval = {}ns",
interval.count());
detectNfsCrawlTask_.updateInterval(
@ -2346,38 +2347,63 @@ void EdenServer::detectNfsCrawl() {
.value_or(0);
if (readCount > readThreshold || readDirCount > readDirThreshold) {
XLOGF(
INFO,
"Nfs crawl detected, initiating process discovery and attribution: "
"[nfs.read_us.count.60 = {} > {} or nfs.readdir[plus]_us.count.60 = {} > {}.",
DBG2,
"NFS crawl detected, initiating process discovery and attribution: "
"[nfs.read_us.count.60 = {} > {} or nfs.readdir[plus]_us.count.60 = {} > {}]",
readCount,
readThreshold,
readDirCount,
readDirThreshold);
// Get list of excluded process names
auto exclusions = edenConfig->nfsCrawlExcludedProcessNames.getValue();
// Iterate over each mount
auto mountPoints = getMountPoints();
for (auto& mountPointHandle : mountPoints) {
folly::via(
getServerState()->getThreadPool().get(), [this, mountPointHandle]() {
auto& mountPoint = mountPointHandle.getEdenMount();
if (mountPoint.isNfsdChannel()) {
auto pids =
proc_util::readProcessIdsForPath(mountPoint.getPath());
XLOGF(
INFO,
"NFS crawl detection found {} processes opening files in mount point: {}",
pids.size(),
mountPoint.getPath());
if (mountPointHandle.getEdenMount().isNfsdChannel()) {
folly::via(
getServerState()->getThreadPool().get(),
[serverState = serverState_, mountPointHandle, exclusions]() {
const auto& mount = mountPointHandle.getEdenMount();
// Get list of pids that have open files/paths on the mount
auto pids = proc_util::readProcessIdsForPath(mount.getPath());
for (auto pid : pids) {
auto processNameHandle =
serverState_->getProcessNameCache()->lookup(pid);
XLOGF(
INFO,
"NFS detected process: {} ({})",
processNameHandle.get(),
pid);
auto simpleName = proc_util::readProcessSimpleName(pid);
if (simpleName.has_value()) {
if (exclusions.find(simpleName.value()) == exclusions.end()) {
// Log process hierarchy
auto hierarchy = proc_util::getProcessHierarchy(
serverState->getProcessNameCache(), pid);
XCHECK(
!hierarchy.empty(),
"proc_util::getProcessHierarchy returned an empty list.");
auto [_pid, sname, pname] = std::move(hierarchy.top());
hierarchy.pop();
std::string output = fmt::format(
"NFS crawl detection found process with open files in mount point: {}\n {} ({}): {}",
mount.getPath(),
sname,
_pid,
pname);
while (!hierarchy.empty()) {
fmt::format_to(std::back_inserter(output), "\n");
auto [_pid, sname, pname] = std::move(hierarchy.top());
hierarchy.pop();
fmt::format_to(
std::back_inserter(output),
" ->{}({}): {}",
sname,
_pid,
pname);
}
XLOGF(DBG2, output);
}
}
}
}
});
});
}
}
}
}

View File

@ -9,6 +9,7 @@
#include <array>
#include <fstream>
#include <optional>
#include <vector>
#include <folly/Conv.h>
@ -23,6 +24,7 @@
#include <mach/mach_init.h> // @manual
#include <mach/task.h> // @manual
#include <mach/task_info.h> // @manual
#include <sys/proc_info.h> // @manual
#endif
#ifdef _WIN32
@ -270,7 +272,7 @@ ProcessList readProcessIdsForPath(FOLLY_MAYBE_UNUSED const AbsolutePath& path) {
// If anything went wrong, resize buffer to 0 size.
if (pids_size < 0) {
XLOGF(
WARNING,
INFO,
"proc_listpidspath failed: {} ()",
folly::errnoStr(pids_size),
pids_size);
@ -284,4 +286,79 @@ ProcessList readProcessIdsForPath(FOLLY_MAYBE_UNUSED const AbsolutePath& path) {
return pids;
}
std::optional<ProcessSimpleName> readProcessSimpleName(
FOLLY_MAYBE_UNUSED pid_t pid) {
std::optional<ProcessSimpleName> simpleName;
#ifdef __APPLE__
// Max length of process name returned from proc_name
// https://opensource.apple.com/source/xnu/xnu-1228.0.2/bsd/sys/proc_info.h.auto.html
std::vector<char> name;
int32_t len = 2 * MAXCOMLEN + 1;
name.resize(len);
auto namePtr = name.data();
auto ret = proc_name(pid, namePtr, len);
if (ret > len) {
// This should never happen.
XLOGF(INFO, "proc_name return length greater than provided buffer.");
return std::nullopt;
}
if (ret != 0) {
name.resize(ret);
simpleName.emplace(std::string(name.begin(), name.end()));
} else {
XLOGF(DBG2, "proc_name failed: {} ({})", folly::errnoStr(errno), errno);
}
#endif
return simpleName;
}
std::optional<pid_t> getParentProcessId(FOLLY_MAYBE_UNUSED pid_t pid) {
std::optional<pid_t> ppid;
#ifdef __APPLE__
// Future improvements might include caching of parent pid lookups. However,
// as pids are recycled over time we would need some way to invalidate the
// cache when necessary.
proc_bsdinfo info;
int32_t size = sizeof(info);
auto ret = proc_pidinfo(
pid,
PROC_PIDTBSDINFO,
true, // find zombies
&info,
size);
if (ret == 0) {
XLOGF(DBG3, "proc_pidinfo failed: {} ({})", folly::errnoStr(errno), errno);
} else if (ret != size) {
XLOGF(WARN, "proc_pidinfo failed returned an invalid size");
} else if (info.pbi_ppid <= 0) {
XLOGF(WARN, "proc_pidinfo returned an invalid parent pid.");
} else {
ppid.emplace(info.pbi_ppid);
}
#endif
return ppid;
}
std::stack<std::tuple<pid_t, std::string, ProcessName>> getProcessHierarchy(
std::shared_ptr<ProcessNameCache> processNameCache,
pid_t pid) {
std::stack<std::tuple<pid_t, std::string, ProcessName>> hierarchy;
do {
auto simpleName = proc_util::readProcessSimpleName(pid);
hierarchy.emplace(
pid,
simpleName.value_or("<unknown>"),
processNameCache->lookup(pid).get());
pid = proc_util::getParentProcessId(pid).value_or(0);
// Exit when reaching the root process (not included).
} while (pid > 1);
return hierarchy;
}
} // namespace facebook::eden::proc_util

View File

@ -8,9 +8,12 @@
#pragma once
#include <folly/Range.h>
#include <optional>
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>
#include "eden/common/utils/ProcessName.h"
#include "eden/common/utils/ProcessNameCache.h"
#include "eden/fs/utils/PathFuncs.h"
namespace facebook::eden {
@ -130,5 +133,31 @@ using ProcessList = std::vector<pid_t>;
*/
ProcessList readProcessIdsForPath(const AbsolutePath& path);
/**
* Stores a simple process name - just the name of the process. This is in
* contrast to ProcessName which stores the process command line minus the
* process path.
*/
using ProcessSimpleName = std::string;
/**
* Fetches the process name for the specified process ID. If the pid is invalid
* or an error occurs while fetching, returns std::nullopt.
*/
std::optional<ProcessSimpleName> readProcessSimpleName(pid_t pid);
/**
* Get the parent process ID of the specified process ID, if one exists.
*/
std::optional<pid_t> getParentProcessId(pid_t pid);
/**
* Get the process hierarchy, as a stack of tuples of pid, simple name and
* ProcessName, of the specified process ID.
*/
std::stack<std::tuple<pid_t, std::string, ProcessName>> getProcessHierarchy(
std::shared_ptr<ProcessNameCache> processNameCache,
pid_t pid);
} // namespace proc_util
} // namespace facebook::eden