eden debug processfetch

Summary:
This diff added a sub-command `process_fetch` to `eden debug`, which lists eden processes sorted by their fetch counts.

New argument options will be added in following diffs.

Reviewed By: fanzeyi

Differential Revision: D22456147

fbshipit-source-id: 75f94ec0ad03d59af1a5bf31c37bdf56de70241b
This commit is contained in:
Ailin Zhang 2020-07-17 06:42:24 -07:00 committed by Facebook GitHub Bot
parent 8a244dc0de
commit 2deb2f28eb
4 changed files with 105 additions and 23 deletions

View File

@ -46,10 +46,10 @@ from fb303_core import BaseService
from thrift.protocol.TSimpleJSONProtocol import TSimpleJSONProtocolFactory
from thrift.util import Serializer
from . import cmd_util, stats_print, subcmd as subcmd_mod, ui as ui_mod
from . import cmd_util, stats_print, subcmd as subcmd_mod, tabulate, ui as ui_mod
from .config import EdenCheckout, EdenInstance
from .subcmd import Subcmd
from .util import split_inodes_by_operation_type
from .util import format_cmd, split_inodes_by_operation_type
MB = 1024 ** 2
@ -168,6 +168,86 @@ class TreeCmd(Subcmd):
return 0
class Process:
def __init__(self, pid, cmd):
self.pid = pid
self.cmd = format_cmd(cmd)
self.fetch_count = 0
def set_fetchs(self, fetch_counts):
self.fetch_count = fetch_counts
@debug_cmd("processfetch", "List processes and fetch counts")
class ProcessFetchCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"-s",
"--short-cmdline",
action="store_true",
default=False,
help="Show commands without arguments, otherwise show the entire cmdlines",
)
parser.add_argument(
"-a",
"--all-processes",
action="store_true",
default=False,
help="Default option only lists recent processes. This option shows all"
"processes from the beginning of this EdenFS. Old cmdlines might be unavailable",
)
def run(self, args: argparse.Namespace):
processes: Dict[int, Process()] = {}
header = ["PID", "FETCH COUNT", "CMD"]
rows = []
eden = cmd_util.get_eden_instance(args)
with eden.get_thrift_client() as client:
# Get the data in the past 16 seconds. All data is collected only within
# this period except that fetchCountsByPid is from the beginning of start
counts = client.getAccessCounts(16)
for _, accesses in counts.accessesByMount.items():
# Get recent process accesses
for pid, _ in accesses.accessCountsByPid.items():
cmd = counts.cmdsByPid.get(pid, b"<unknown>")
processes[pid] = Process(pid, cmd)
# When querying older versions of EdenFS fetchCountsByPid will be None
fetch_counts_by_pid = accesses.fetchCountsByPid or {}
# Set fetch counts for recent processes
for pid, fetch_counts in fetch_counts_by_pid.items():
if pid not in processes:
if not args.all_processes:
continue
else:
cmd = counts.cmdsByPid.get(pid, b"<unknown>")
processes[pid] = Process(pid, cmd)
processes[pid].set_fetchs(fetch_counts)
sorted_processes = sorted(
processes.items(), key=lambda x: x[1].fetch_count, reverse=True
)
for (pid, process) in sorted_processes:
if process.fetch_count:
row: Dict[str, str] = {}
cmd = process.cmd
if args.short_cmdline:
cmd = cmd.split()[0]
row["PID"] = pid
row["FETCH COUNT"] = process.fetch_count
row["CMD"] = cmd
rows.append(row)
print(tabulate.tabulate(header, rows))
@debug_cmd("blob", "Show eden's data for a source control blob")
class BlobCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:

View File

@ -6,7 +6,8 @@
import unittest
from ..top import Process, format_cmd, format_duration, format_mount
from ..top import Process, format_duration, format_mount
from ..util import format_cmd
class TopTest(unittest.TestCase):

View File

@ -9,8 +9,6 @@ import collections
import copy
import datetime
import os
import re
import shlex
import socket
import time
from enum import Enum
@ -20,6 +18,7 @@ from typing import Any, Dict, List, Optional, Tuple
from facebook.eden.ttypes import AccessCounts
from . import cmd_util
from .util import format_cmd
class State(Enum):
@ -117,24 +116,6 @@ def format_time(elapsed, modulos, suffixes):
return f"{elapsed}{last_suffix}"
def format_cmd(cmd):
args = os.fsdecode(cmd)
# remove trailing null which would cause the command to show up with an
# exta empty string on the end
args = re.sub("\x00$", "", args)
args = args.split("\x00")
# Focus on just the basename as the paths can be quite long
cmd = args[0]
if os.path.isabs(cmd):
cmd = os.path.basename(cmd)
# Show cmdline args too, if they exist
return " ".join(shlex.quote(p) for p in [cmd] + args[1:])
COLUMN_FORMATTING = Row(
top_pid=lambda x: x,
mount=lambda x: x,

View File

@ -10,6 +10,8 @@ import getpass
import json
import os
import random
import re
import shlex
import stat
import subprocess
import sys
@ -626,3 +628,21 @@ def resolve_path(path: Path, strict: bool = False) -> Path:
if sys.platform == "win32":
return Path(os.path.realpath(path))
return path.resolve(strict=strict)
def format_cmd(cmd):
args = os.fsdecode(cmd)
# remove trailing null which would cause the command to show up with an
# exta empty string on the end
args = re.sub("\x00$", "", args)
args = args.split("\x00")
# Focus on just the basename as the paths can be quite long
cmd = args[0]
if os.path.isabs(cmd):
cmd = os.path.basename(cmd)
# Show cmdline args too, if they exist
return " ".join(shlex.quote(p) for p in [cmd] + args[1:])