sapling/eden/fs/cli/debug.py
generatedunixname89002005307016 ac21274523 upgrade pyre version in fbcode/eden - batch 1
Reviewed By: yancouto

Differential Revision: D44181498

fbshipit-source-id: 8772acfdcebc86976e588669b63e3200689ca0f4
2023-03-20 07:02:53 -07:00

1826 lines
62 KiB
Python

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
# pyre-unsafe
import argparse
import binascii
import collections
import json
import os
import re
import shlex
import stat
import subprocess
import sys
import time
from pathlib import Path
from typing import (
Any,
Callable,
cast,
DefaultDict,
Dict,
IO,
Iterator,
List,
Optional,
Pattern,
Tuple,
Type,
Union,
)
import eden.dirstate
import facebook.eden.ttypes as eden_ttypes
import thrift.util.inspect
from eden.fs.cli.cmd_util import get_eden_instance
from eden.thrift.legacy import EdenClient
from facebook.eden import EdenService
from facebook.eden.constants import (
DIS_COMPUTE_BLOB_SIZES,
DIS_NOT_RECURSIVE,
DIS_REQUIRE_LOADED,
DIS_REQUIRE_MATERIALIZED,
)
from facebook.eden.ttypes import (
BlobMetadataOrError,
BlobMetadataWithOrigin,
DataFetchOrigin,
DebugGetBlobMetadataRequest,
DebugGetRawJournalParams,
DebugGetScmBlobRequest,
DebugJournalDelta,
EdenError,
MountId,
NoValueForKeyError,
ScmBlobMetadata,
ScmBlobOrError,
ScmBlobWithOrigin,
SyncBehavior,
TimeSpec,
TreeInodeDebugInfo,
)
from fb303_core import BaseService
from thrift.protocol.TSimpleJSONProtocol import TSimpleJSONProtocolFactory
from thrift.Thrift import TApplicationException
from thrift.util import Serializer
try:
from tqdm import tqdm
except ModuleNotFoundError:
def tqmd(x):
return x
from . import (
cmd_util,
hg_util,
prefetch_profile as prefetch_profile_mod,
rage as rage_mod,
stats_print,
subcmd as subcmd_mod,
tabulate,
ui as ui_mod,
)
from .config import EdenCheckout, EdenInstance
from .subcmd import Subcmd
from .util import format_cmd, format_mount, print_stderr, split_inodes_by_operation_type
MB: int = 1024**2
debug_cmd = subcmd_mod.Decorator()
# This is backported from Python 3.9.
#
# TODO: Use argparse.BooleanOptionalAction when we
# can expect Python 3.9 or later.
class BooleanOptionalAction(argparse.Action):
def __init__(
self,
option_strings,
dest,
default=None,
type=None,
choices=None,
required=False,
help=None,
metavar=None,
):
_option_strings = []
for option_string in option_strings:
_option_strings.append(option_string)
if option_string.startswith("--"):
option_string = "--no-" + option_string[2:]
_option_strings.append(option_string)
super().__init__(
option_strings=_option_strings,
dest=dest,
nargs=0,
default=default,
type=type,
choices=choices,
required=required,
help=help,
metavar=metavar,
)
def __call__(self, parser, namespace, values, option_string=None):
if option_string in self.option_strings:
setattr(namespace, self.dest, not option_string.startswith("--no-"))
def format_usage(self):
return " | ".join(self.option_strings)
def escape_path(value: bytes) -> str:
"""
Take a binary path value, and return a printable string, with special
characters escaped.
"""
def human_readable_byte(b: int) -> str:
if b < 0x20 or b >= 0x7F:
return "\\x{:02x}".format(b)
elif b == ord(b"\\"):
return "\\\\"
return chr(b)
return "".join(human_readable_byte(b) for b in value)
def hash_str(value: bytes) -> str:
"""
Take a hash as a binary value, and return it represented as a hexadecimal
string.
"""
return binascii.hexlify(value).decode("utf-8")
def object_id_str(value: bytes) -> str:
# While we migrate the representation of object IDs, continue to support
# older versions of EdenFS returning 20-byte binary hashes.
if len(value) == 20:
return hash_str(value)
return value.decode("utf-8", errors="replace")
def parse_object_id(value: str) -> bytes:
return value.encode()
@debug_cmd("parents", "Show EdenFS's current working copy parent")
class ParentsCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path",
nargs="?",
help="The path to an EdenFS mount point. Uses `pwd` by default.",
)
parser.add_argument(
"--hg", action="store_true", help="Include Mercurial's parents in output"
)
def _commit_hex(self, commit: bytes) -> str:
return binascii.hexlify(commit).decode("utf-8")
def run(self, args: argparse.Namespace) -> int:
null_commit_id = 20 * b"\x00"
path = args.path or os.getcwd()
_, checkout, _ = cmd_util.require_checkout(args, path)
try:
working_copy_parent, checked_out_revision = checkout.get_snapshot()
except Exception as ex:
print_stderr(f"error parsing EdenFS snapshot : {ex}")
return 1
if args.hg:
hg_parents, _, _ = _get_dirstate_data(checkout)
print("Mercurial p0: {}".format(self._commit_hex(hg_parents[0])))
if hg_parents[1] != null_commit_id:
print("Mercurial p1: {}".format(self._commit_hex(hg_parents[1])))
print("EdenFS snapshot: {}".format(working_copy_parent))
else:
print(working_copy_parent)
return 0
@debug_cmd("tree", "Show EdenFS's data for a source control tree")
class TreeCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"-L",
"--load",
action=BooleanOptionalAction,
default=True,
help="Load data from the backing store if necessary",
)
parser.add_argument("mount", help="The EdenFS mount point path.")
parser.add_argument("id", help="The tree ID")
def run(self, args: argparse.Namespace) -> int:
instance, checkout, _rel_path = cmd_util.require_checkout(args, args.mount)
tree_id = parse_object_id(args.id)
local_only = not args.load
with instance.get_thrift_client_legacy() as client:
entries = client.debugGetScmTree(
bytes(checkout.path), tree_id, localStoreOnly=local_only
)
max_object_id_len = max(
(len(object_id_str(entry.id)) for entry in entries), default=0
)
for entry in entries:
file_type_flags, perms = _parse_mode(entry.mode)
print(
"{} {:4o} {:<{}} {}".format(
file_type_flags,
perms,
object_id_str(entry.id),
max_object_id_len,
escape_path(entry.name),
)
)
return 0
class Process:
def __init__(self, pid, cmd, mount) -> None:
self.pid = pid
self.cmd = format_cmd(cmd)
self.fetch_count = 0
self.mount = format_mount(mount)
def set_fetchs(self, fetch_counts: int) -> None:
self.fetch_count = fetch_counts
@debug_cmd("processfetch", "List processes and fetch counts")
class ProcessFetchCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"-s",
"--short-cmdline",
action="store_true",
default=False,
help="Show commands without arguments, otherwise show the entire cmdlines",
)
parser.add_argument(
"-a",
"--all-processes",
action="store_true",
default=False,
help="Default option only lists recent processes. This option shows all "
"processes from the beginning of this EdenFS. Old cmdlines might be unavailable",
)
parser.add_argument(
"-m",
"--mount",
action="store_true",
default=False,
help="Show mount base name for each process",
)
def run(self, args: argparse.Namespace) -> int:
# pyre-fixme[31]: Expression `Process())]` is not a valid type.
processes: Dict[int, Process()] = {}
header = ["PID", "FETCH COUNT", "CMD"]
if args.mount:
header.insert(1, "MOUNT")
rows = []
eden = cmd_util.get_eden_instance(args)
with eden.get_thrift_client_legacy() as client:
# Get the data in the past 16 seconds. All data is collected only within
# this period except that fetchCountsByPid is from the beginning of start
counts = client.getAccessCounts(16)
for mount, accesses in counts.accessesByMount.items():
# Get recent process accesses
for pid, _ in accesses.accessCountsByPid.items():
cmd = counts.cmdsByPid.get(pid, b"<unknown>")
processes[pid] = Process(pid, cmd, mount)
# When querying older versions of EdenFS fetchCountsByPid will be None
fetch_counts_by_pid = accesses.fetchCountsByPid or {}
# Set fetch counts for recent processes
for pid, fetch_counts in fetch_counts_by_pid.items():
if pid not in processes:
if not args.all_processes:
continue
else:
cmd = counts.cmdsByPid.get(pid, b"<unknown>")
processes[pid] = Process(pid, cmd, mount)
processes[pid].set_fetchs(fetch_counts)
sorted_processes = sorted(
processes.items(), key=lambda x: x[1].fetch_count, reverse=True
)
for (pid, process) in sorted_processes:
if process.fetch_count:
row: Dict[str, str] = {}
cmd = process.cmd
if args.short_cmdline:
cmd = cmd.split()[0]
row["PID"] = pid
row["FETCH COUNT"] = process.fetch_count
row["CMD"] = cmd
if args.mount:
row["MOUNT"] = process.mount
rows.append(row)
print(tabulate.tabulate(header, rows))
return 0
def add_get_object_options(parser: argparse.ArgumentParser, objectType: str) -> None:
group = parser.add_mutually_exclusive_group()
group.add_argument(
"-o",
"--object-cache-only",
action="store_true",
default=False,
help=f"Only check the in memory object cache for the {objectType}",
)
group.add_argument(
"-l",
"--local-store-only",
action="store_true",
default=False,
help=f"Only check the EdenFS LocalStore for {objectType}. ",
)
group.add_argument(
"-d", # d for "disk cache"
"--hgcache-only",
action="store_true",
default=False,
help=f"Only check the hgcache for the {objectType}",
)
group.add_argument(
"-r",
"--remote-only",
action="store_true",
default=False,
help=f"Only fetch the {objectType} from the servers. ",
)
group.add_argument(
"-a",
"--all",
action="store_true",
default=False,
help=f"Fetch the {objectType} from all storage locations and display their contents. ",
)
def get_origin_flags(args: argparse.Namespace) -> int:
origin_flags = DataFetchOrigin.ANYWHERE
if args.object_cache_only:
origin_flags = DataFetchOrigin.MEMORY_CACHE
elif args.local_store_only:
origin_flags = DataFetchOrigin.DISK_CACHE
elif args.hgcache_only:
origin_flags = DataFetchOrigin.LOCAL_BACKING_STORE
elif args.remote_only:
origin_flags = DataFetchOrigin.REMOTE_BACKING_STORE
elif args.all:
origin_flags = (
DataFetchOrigin.MEMORY_CACHE
| DataFetchOrigin.DISK_CACHE
| DataFetchOrigin.LOCAL_BACKING_STORE
| DataFetchOrigin.REMOTE_BACKING_STORE
| DataFetchOrigin.ANYWHERE
)
return origin_flags
def origin_to_text(origin: DataFetchOrigin) -> str:
if origin == DataFetchOrigin.MEMORY_CACHE:
return "object cache"
elif origin == DataFetchOrigin.DISK_CACHE:
return "local store"
elif origin == DataFetchOrigin.LOCAL_BACKING_STORE:
return "hgcache"
elif origin == DataFetchOrigin.REMOTE_BACKING_STORE:
return "servers"
elif origin == DataFetchOrigin.ANYWHERE:
return "EdenFS complete data fetching behavior"
return "<unknown>"
def print_blob(blob: bytes) -> None:
sys.stdout.buffer.write(blob)
def print_blob_metadata(id: str, metadata: ScmBlobMetadata) -> None:
print("Blob ID: {}".format(id))
print("Size: {}".format(metadata.size))
print("SHA-1: {}".format(hash_str(metadata.contentsSha1)))
def print_all_objects(
objects: List,
object_type: str,
is_error: Callable,
equal: Callable,
print_data: Callable,
) -> None:
non_error_objects = []
for obj in objects:
non_error_found = not is_error(obj)
pretty_origin = origin_to_text(obj.origin)
pretty_non_error_found = "hit" if non_error_found else "miss"
print(f"{pretty_origin}: {pretty_non_error_found}")
if non_error_found:
non_error_objects.append(obj)
if len(non_error_objects) == 0:
return
if len(non_error_objects) == 1:
print("\n")
print_data(non_error_objects[0])
return
objects_match = True
for obj in non_error_objects[1::]:
if not equal((obj, non_error_objects[0])):
objects_match = False
break
if objects_match:
print(f"\nAll {object_type}s match :) \n")
print_data(non_error_objects[0])
else:
print(f"\n!!!!! {object_type} mismatch !!!!! \n")
for obj in non_error_objects:
prety_fromwhere = origin_to_text(obj.origin)
print(f"{object_type} from {prety_fromwhere}\n")
print("-----------------------------\n")
print_data(obj)
print("\n-----------------------------\n\n")
@debug_cmd(
"blob",
"Show EdenFS's data for a source control blob. Fetches from ObjectStore "
"by default: use options to inspect different origins.",
)
class BlobCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
add_get_object_options(parser, "blob")
parser.add_argument(
"mount",
help="The EdenFS mount point path.",
)
parser.add_argument("id", help="The blob ID")
def print_blob_or_error(self, blobOrError: ScmBlobOrError) -> None:
if blobOrError.getType() == ScmBlobOrError.BLOB:
print_blob(blobOrError.get_blob())
else:
error = blobOrError.get_error()
sys.stdout.buffer.write(f"ERROR fetching data: {error}\n".encode())
def print_all_blobs(self, blobs: List[ScmBlobWithOrigin]) -> None:
print_all_objects(
blobs,
"blob",
lambda blob: blob.blob.getType() != ScmBlobOrError.BLOB,
lambda blobs: blobs[0].blob.get_blob() == blobs[1].blob.get_blob(),
lambda blob: print_blob(blob.blob.get_blob()),
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, _rel_path = cmd_util.require_checkout(args, args.mount)
blob_id = parse_object_id(args.id)
origin_flags = get_origin_flags(args)
with instance.get_thrift_client_legacy() as client:
data = client.debugGetBlob(
DebugGetScmBlobRequest(
MountId(bytes(checkout.path)),
blob_id,
origin_flags,
)
)
if args.all:
self.print_all_blobs(data.blobs)
else:
self.print_blob_or_error(data.blobs[0].blob)
return 0
@debug_cmd("blobmeta", "Show EdenFS's metadata about a source control blob")
class BlobMetaCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
add_get_object_options(parser, "blob metadata")
parser.add_argument("mount", help="The EdenFS mount point path.")
parser.add_argument("id", help="The blob ID")
def print_blob_metadata_or_error(
self, id: str, metadataOrError: BlobMetadataOrError
) -> None:
if metadataOrError.getType() == BlobMetadataOrError.METADATA:
print_blob_metadata(id, metadataOrError.get_metadata())
else:
error = metadataOrError.get_error()
sys.stdout.buffer.write(f"ERROR fetching data: {error}\n".encode())
def print_all_blob_metadatas(
self, id: str, blob_metadatas: List[BlobMetadataWithOrigin]
) -> None:
print_all_objects(
blob_metadatas,
"blob metadata",
lambda metadata: metadata.metadata.getType()
!= BlobMetadataOrError.METADATA,
lambda metadatas: metadatas[0].metadata.get_metadata()
== metadatas[1].metadata.get_metadata(),
lambda metadata: print_blob_metadata(id, metadata.metadata.get_metadata()),
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, _rel_path = cmd_util.require_checkout(args, args.mount)
blob_id = parse_object_id(args.id)
origin_flags = get_origin_flags(args)
with instance.get_thrift_client_legacy() as client:
info = client.debugGetBlobMetadata(
DebugGetBlobMetadataRequest(
MountId(bytes(checkout.path)),
blob_id,
origin_flags,
)
)
if args.all:
self.print_all_blob_metadatas(args.id, info.metadatas)
else:
self.print_blob_metadata_or_error(args.id, info.metadatas[0].metadata)
return 0
class MismatchedBlobSize:
actual_blobsize: int
cached_blobsize: int
def __init__(self, actual_blobsize: int, cached_blobsize: int) -> None:
self.actual_blobsize = actual_blobsize
self.cached_blobsize = cached_blobsize
def check_blob_and_size_match(
client: EdenClient, checkout: Path, identifying_hash: bytes
) -> Optional[MismatchedBlobSize]:
try:
response = client.debugGetBlob(
DebugGetScmBlobRequest(
mountId=MountId(bytes(checkout)),
id=identifying_hash,
origins=DataFetchOrigin.LOCAL_BACKING_STORE, # We don't want to cause any network fetches.
)
)
blob = None
for blobFromACertainPlace in response.blobs:
try:
blob = blobFromACertainPlace.blob.get_blob()
except AssertionError:
# only care to check blobs that exist
pass
blobmeta = client.debugGetScmBlobMetadata(
mountPoint=bytes(checkout),
id=identifying_hash,
localStoreOnly=True, # We don't want to cause any network fetches.
)
if blob is not None and blobmeta.size != len(blob):
return MismatchedBlobSize(
actual_blobsize=len(blob), cached_blobsize=blobmeta.size
)
except EdenError:
# we don't care if debugGetScmBlobV2 returns an EdenError because
# we only care about data that has been read by the user and thus is
# present locally being incorrect.
# We don't care if debugGetScmBlobMetadata returns an EdenError because
# we only care about cached data being incorrect.
return None
except TApplicationException as ex:
# we don't care about older versions of eden being incompatible, we will
# just run the check when we can.
if ex.type == TApplicationException.UNKNOWN_METHOD:
return None
def check_size_corruption(
client: EdenClient,
instance: EdenInstance,
checkout: Path,
loaded_tree_inodes: List[TreeInodeDebugInfo],
) -> int:
# list of files whose size is wrongly cached in the local store
local_store_corruption: List[Tuple[bytes, MismatchedBlobSize]] = []
for loaded_dir in tqdm(loaded_tree_inodes):
for dirent in loaded_dir.entries:
if not stat.S_ISREG(dirent.mode) or dirent.materialized:
continue
result = check_blob_and_size_match(client, checkout, dirent.hash)
if result is not None:
local_store_corruption.append((dirent.name, result))
if local_store_corruption:
print(f"{len(local_store_corruption)} corrupted sizes in the local store")
for (filename, mismatch) in local_store_corruption[:10]:
print(
f"{filename} --"
f"actual size: {mismatch.actual_blobsize} -- "
f"local store blob size: {mismatch.cached_blobsize}"
)
if len(local_store_corruption) > 10:
print("...")
return 1
return 0
@debug_cmd(
"sizecorruption", "Check if the metadata blob size match the actual blob size"
)
class SizeCorruptionCmd(Subcmd):
def run(self, args: argparse.Namespace) -> int:
instance = get_eden_instance(args)
checkouts = instance.get_mounts()
number_effected_mounts = 0
with instance.get_thrift_client_legacy() as client:
for path in sorted(checkouts.keys()):
print(f"Checking {path}")
inodes = client.debugInodeStatus(
bytes(path),
b"",
flags=DIS_REQUIRE_LOADED,
sync=SyncBehavior(),
)
number_effected_mounts += check_size_corruption(
client, instance, path, inodes
)
return number_effected_mounts
_FILE_TYPE_FLAGS: Dict[int, str] = {
stat.S_IFREG: "f",
stat.S_IFDIR: "d",
stat.S_IFLNK: "l",
}
def _parse_mode(mode: int) -> Tuple[str, int]:
"""
Take a mode value, and return a tuple of (file_type, permissions)
where file type is a one-character flag indicating if this is a file,
directory, or symbolic link.
"""
file_type_str = _FILE_TYPE_FLAGS.get(stat.S_IFMT(mode), "?")
perms = mode & 0o7777
return file_type_str, perms
@debug_cmd("buildinfo", "Show the build info for the EdenFS server")
class BuildInfoCmd(Subcmd):
def run(self, args: argparse.Namespace) -> int:
instance = cmd_util.get_eden_instance(args)
do_buildinfo(instance)
return 0
def do_buildinfo(instance: EdenInstance, out: Optional[IO[bytes]] = None) -> None:
if out is None:
out = sys.stdout.buffer
build_info = instance.get_server_build_info()
sorted_build_info = collections.OrderedDict(sorted(build_info.items()))
for key, value in sorted_build_info.items():
out.write(b"%s: %s\n" % (key.encode(), value.encode()))
@debug_cmd(
"gc_process_fetch", "clear and start a new recording of process fetch counts"
)
class GcProcessFetchCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"mount",
nargs="?",
help="The path to an EdenFS mount point. If not specified,"
" process fetch data will be cleared for all mounts.",
)
def run(self, args: argparse.Namespace) -> int:
eden = cmd_util.get_eden_instance(args)
with eden.get_thrift_client_legacy() as client:
if args.mount:
instance, checkout, _rel_path = cmd_util.require_checkout(
args, args.mount
)
client.clearFetchCountsByMount(bytes(checkout.path))
else:
client.clearFetchCounts()
return 0
@debug_cmd("clear_local_caches", "Clears local caches of objects stored in RocksDB")
class ClearLocalCachesCmd(Subcmd):
def run(self, args: argparse.Namespace) -> int:
instance = cmd_util.get_eden_instance(args)
with instance.get_thrift_client_legacy() as client:
client.debugClearLocalStoreCaches()
return 0
@debug_cmd("compact_local_storage", "Asks RocksDB to compact its storage")
class CompactLocalStorageCmd(Subcmd):
def run(self, args: argparse.Namespace) -> int:
instance = cmd_util.get_eden_instance(args)
with instance.get_thrift_client_legacy() as client:
client.debugCompactLocalStorage()
return 0
@debug_cmd("hg_copy_map_get_all", "Copymap for dirstate")
class HgCopyMapGetAllCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path",
nargs="?",
help="The path to an EdenFS mount point. Uses `pwd` by default.",
)
def run(self, args: argparse.Namespace) -> int:
path = args.path or os.getcwd()
_instance, checkout, _rel_path = cmd_util.require_checkout(args, path)
_parents, _dirstate_tuples, copymap = _get_dirstate_data(checkout)
_print_copymap(copymap)
return 0
def _print_copymap(copy_map: Dict[str, str]) -> None:
copies = [f"{item[1]} -> {item[0]}" for item in copy_map.items()]
copies.sort()
for copy in copies:
print(copy)
@debug_cmd("hg_dirstate", "Print full dirstate")
class HgDirstateCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path",
nargs="?",
help="The path to an EdenFS mount point. Uses `pwd` by default.",
)
def run(self, args: argparse.Namespace) -> int:
path = args.path or os.getcwd()
_instance, checkout, _rel_path = cmd_util.require_checkout(args, path)
_parents, dirstate_tuples, copymap = _get_dirstate_data(checkout)
out = ui_mod.get_output()
entries = list(dirstate_tuples.items())
out.writeln(f"Non-normal Files ({len(entries)}):", attr=out.BOLD)
entries.sort(key=lambda entry: entry[0]) # Sort by key.
for path, dirstate_tuple in entries:
_print_hg_nonnormal_file(Path(os.fsdecode(path)), dirstate_tuple, out)
out.writeln(f"Copymap ({len(copymap)}):", attr=out.BOLD)
_print_copymap(copymap)
return 0
def _print_hg_nonnormal_file(
rel_path: Path, dirstate_tuple: Tuple[str, Any, int], out: ui_mod.Output
) -> None:
status = _dirstate_char_to_name(dirstate_tuple[0])
merge_state = _dirstate_merge_state_to_name(dirstate_tuple[2])
out.writeln(f"{rel_path}", fg=out.GREEN)
out.writeln(f" status = {status}")
out.writeln(f" mode = {oct(dirstate_tuple[1])}")
out.writeln(f" mergeState = {merge_state}")
def _dirstate_char_to_name(state: str) -> str:
if state == "n":
return "Normal"
elif state == "m":
return "NeedsMerging"
elif state == "r":
return "MarkedForRemoval"
elif state == "a":
return "MarkedForAddition"
elif state == "?":
return "NotTracked"
else:
raise Exception(f"Unrecognized dirstate char: {state}")
def _dirstate_merge_state_to_name(merge_state: int) -> str:
if merge_state == 0:
return "NotApplicable"
elif merge_state == -1:
return "BothParents"
elif merge_state == -2:
return "OtherParent"
else:
raise Exception(f"Unrecognized merge_state value: {merge_state}")
def _get_dirstate_data(
checkout: EdenCheckout,
) -> Tuple[Tuple[bytes, bytes], Dict[str, Tuple[str, Any, int]], Dict[str, str]]:
"""Returns a tuple of (parents, dirstate_tuples, copymap).
On error, returns None.
"""
filename = checkout.hg_dot_path.joinpath("dirstate")
with filename.open("rb") as f:
return eden.dirstate.read(f, str(filename))
@debug_cmd("inode", "Show data about loaded inodes")
class InodeCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path",
help="The path to the EdenFS mount point. If a subdirectory inside "
"a mount point is specified, only data about inodes under the "
"specified subdirectory will be reported.",
)
parser.add_argument(
"--recursive",
default=False,
help="Recursively walk the directory and report data on all of the subdirectories recursively.",
)
def run(self, args: argparse.Namespace) -> int:
out = sys.stdout.buffer
instance, checkout, rel_path = cmd_util.require_checkout(args, args.path)
with instance.get_thrift_client_legacy() as client:
flags = DIS_REQUIRE_LOADED | DIS_COMPUTE_BLOB_SIZES
if not args.recursive:
flags |= DIS_NOT_RECURSIVE
results = client.debugInodeStatus(
bytes(checkout.path),
bytes(rel_path),
flags=flags,
sync=SyncBehavior(),
)
out.write(b"%d loaded TreeInodes\n" % len(results))
for inode_info in results:
_print_inode_info(inode_info, out)
return 0
@debug_cmd(
"modified",
"Enumerate all potentially-modified inode paths",
aliases=["materialized"],
)
class MaterializedCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path",
default=None,
nargs="?",
help="The path to the EdenFS mount point. If a subdirectory inside "
"a mount point is specified, only data about inodes under the "
"specified subdirectory will be reported.",
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, rel_path = cmd_util.require_checkout(args, args.path)
with instance.get_thrift_client_legacy() as client:
results = client.debugInodeStatus(
bytes(checkout.path),
bytes(rel_path),
DIS_REQUIRE_MATERIALIZED,
sync=SyncBehavior(),
)
if not results:
return 0
by_inode = {}
for result in results:
by_inode[result.inodeNumber] = result
def walk(ino, path):
print(os.fsdecode(path if path else b"/"))
try:
inode = by_inode[ino]
except KeyError:
return
for entry in inode.entries:
if entry.materialized:
walk(entry.inodeNumber, os.path.join(path, entry.name))
root = results[0]
# In practice, this condition is always true, because edenfs creates .eden at startup.
if root.materialized:
walk(root.inodeNumber, root.path)
return 0
@debug_cmd("file_stats", "Show data about loaded and written files")
class FileStatsCMD(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument("path", help="The path to the EdenFS mount point")
def make_file_entries(
self, paths_and_sizes: List[Tuple[str, int]]
) -> List[Dict[str, Union[str, int]]]:
return [
{"path": path, "size": file_size} for (path, file_size) in paths_and_sizes
]
def make_summary(self, paths_and_sizes: List[Tuple[str, int]]) -> Dict[str, Any]:
# large files larger than 10mb are processed differently by mercurial
large_paths_and_sizes = [
(path, size) for path, size in paths_and_sizes if size > 10 * MB
]
summary = {
"file_count": len(paths_and_sizes),
"total_bytes": sum(size for _, size in paths_and_sizes),
"large_file_count": len(large_paths_and_sizes),
"large_files": self.make_file_entries(large_paths_and_sizes),
"largest_directories_by_file_count": self.get_largest_directories_by_count(
paths_and_sizes
),
}
return summary
@staticmethod
def get_largest_directories_by_count(
paths_and_sizes: List[Tuple[str, int]], min_file_count: int = 1000
) -> List[Dict[str, Union[int, str]]]:
"""
Returns a list of directories that contain more than min_file_count
files.
"""
directories: DefaultDict[str, int] = collections.defaultdict(int)
directories["."] = 0
for filepath, _ in paths_and_sizes:
for parent in Path(filepath).parents:
directories[str(parent)] += 1
directory_list: List[Dict[str, Union[int, str]]] = sorted(
(
{"path": path, "file_count": file_count}
for path, file_count in directories.items()
if file_count >= min_file_count
),
key=lambda d: d["path"],
)
return directory_list
def run(self, args: argparse.Namespace) -> int:
request_root = args.path
instance, checkout, rel_path = cmd_util.require_checkout(args, request_root)
with instance.get_thrift_client_legacy() as client:
inode_results = client.debugInodeStatus(
bytes(checkout.path), bytes(rel_path), flags=0, sync=SyncBehavior()
)
read_files, written_files = split_inodes_by_operation_type(inode_results)
operations = {
"summary": {
"read": self.make_summary(read_files),
"written": self.make_summary(written_files),
},
"details": {
"read_files": self.make_file_entries(read_files),
"written_files": self.make_file_entries(written_files),
},
}
json.dump(operations, fp=sys.stdout, indent=4, separators=(",", ": "))
return 0
@debug_cmd("fuse_calls", "Show data about outstanding fuse calls")
class FuseCallsCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument("path", help="The path to the EdenFS mount point.")
def run(self, args: argparse.Namespace) -> int:
out = sys.stdout.buffer
instance, checkout, _rel_path = cmd_util.require_checkout(args, args.path)
with instance.get_thrift_client_legacy() as client:
outstanding_call = client.debugOutstandingFuseCalls(bytes(checkout.path))
out.write(b"Outstanding FUSE calls: %d\n" % len(outstanding_call))
for count, call in enumerate(outstanding_call):
out.write(b"Call %d\n" % (count + 1))
out.write(b"\topcode: %d\n" % call.opcode)
out.write(b"\tunique: %d\n" % call.unique)
out.write(b"\tnodeid: %d\n" % call.nodeid)
out.write(b"\tuid: %d\n" % call.uid)
out.write(b"\tgid: %d\n" % call.gid)
out.write(b"\tpid: %d\n" % call.pid)
return 0
@debug_cmd(
"start_recording",
"Start an activity recording session and get its id",
)
class StartRecordingCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--output-dir",
help="The output dir to store the performance profile",
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, _rel_path = cmd_util.require_checkout(args, os.getcwd())
with instance.get_thrift_client_legacy() as client:
result = client.debugStartRecordingActivity(
bytes(checkout.path), args.output_dir.encode()
)
if result.unique:
sys.stdout.buffer.write(str(result.unique).encode())
return 0
print(f"Fail to start recording at {args.output_dir}", file=sys.stderr)
return 1
@debug_cmd(
"stop_recording",
"Stop the given activity recording session and get the output file",
)
class StopRecordingCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--unique",
type=int,
help="The id of the recording to stop",
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, _rel_path = cmd_util.require_checkout(args, os.getcwd())
output_path: Optional[bytes] = None
with instance.get_thrift_client_legacy() as client:
result = client.debugStopRecordingActivity(
bytes(checkout.path), args.unique
)
output_path = result.path
if output_path is None:
print(f"Fail to stop recording: {args.unique}", file=sys.stderr)
return 1
sys.stdout.buffer.write(output_path)
return 0
@debug_cmd(
"list_recordings",
"List active activity recording sessions",
)
class ListRecordingsCmd(Subcmd):
def run(self, args: argparse.Namespace) -> int:
instance, checkout, _rel_path = cmd_util.require_checkout(args, os.getcwd())
with instance.get_thrift_client_legacy() as client:
result = client.debugListActivityRecordings(bytes(checkout.path))
if not result.recordings:
print("There is no active activity recording sessions.")
else:
for recording in result.recordings:
path = recording.path
print(
f"ID: {recording.unique} Output file: {'' if path is None else path.decode()}"
)
return 0
def _print_inode_info(inode_info: TreeInodeDebugInfo, out: IO[bytes]) -> None:
out.write(inode_info.path + b"\n")
out.write(b" Inode number: %d\n" % inode_info.inodeNumber)
out.write(b" Ref count: %d\n" % inode_info.refcount)
out.write(b" Materialized?: %s\n" % str(inode_info.materialized).encode())
out.write(b" Object ID: %s\n" % object_id_str(inode_info.treeHash).encode())
out.write(b" Entries (%d total):\n" % len(inode_info.entries))
max_object_id_len = max(
(len(object_id_str(entry.hash)) for entry in inode_info.entries), default=0
)
for entry in inode_info.entries:
if entry.loaded:
loaded_flag = "L"
else:
loaded_flag = "-"
file_type_str, perms = _parse_mode(entry.mode)
line = " {:9} {} {:4o} {} {:<{}} {}\n".format(
entry.inodeNumber,
file_type_str,
perms,
loaded_flag,
object_id_str(entry.hash),
max_object_id_len,
escape_path(entry.name),
)
out.write(line.encode())
@debug_cmd("getpath", "Get the EdenFS path that corresponds to an inode number")
class GetPathCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path",
nargs="?",
help="The path to an EdenFS mount point. Uses `pwd` by default.",
)
parser.add_argument(
"number",
type=int,
help="Display information for the specified inode number.",
)
def run(self, args: argparse.Namespace) -> int:
path = args.path or os.getcwd()
instance, checkout, _rel_path = cmd_util.require_checkout(args, path)
with instance.get_thrift_client_legacy() as client:
inodePathInfo = client.debugGetInodePath(bytes(checkout.path), args.number)
state = "loaded" if inodePathInfo.loaded else "unloaded"
resolved_path = (
checkout.path.joinpath(os.fsdecode(inodePathInfo.path))
if inodePathInfo.linked
else "[unlinked]"
)
print(f"{state} {resolved_path}")
return 0
@debug_cmd("unload", "Unload unused inodes")
class UnloadInodesCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path",
help="The path to the EdenFS mount point. If a subdirectory inside "
"a mount point is specified, only inodes under the "
"specified subdirectory will be unloaded.",
)
parser.add_argument(
"age",
type=float,
nargs="?",
default=0,
help="Minimum age of the inodes to be unloaded in seconds",
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, rel_path = cmd_util.require_checkout(args, args.path)
with instance.get_thrift_client_legacy() as client:
# set the age in nanoSeconds
age = TimeSpec()
age.seconds = int(args.age)
age.nanoSeconds = int((args.age - age.seconds) * 10**9)
count = client.unloadInodeForPath(
bytes(checkout.path), bytes(rel_path), age
)
unload_path = checkout.path.joinpath(rel_path)
print(f"Unloaded {count} inodes under {unload_path}")
return 0
@debug_cmd("flush_cache", "Flush kernel cache for inode")
class FlushCacheCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path", help="Path to a directory/file inside an EdenFS mount."
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, rel_path = cmd_util.require_checkout(args, args.path)
with instance.get_thrift_client_legacy() as client:
client.invalidateKernelInodeCache(bytes(checkout.path), bytes(rel_path))
return 0
@debug_cmd("log", "Display/Gather the EdenFS log file. Defaults to Display mode.")
class LogCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--upload",
action="store_true",
help=(
"Gather logs from eden and uploads them externally. "
"This uses the upload tool specified by the rage.reporter config value"
),
)
group.add_argument(
"--stdout",
action="store_true",
help="Print the logs to stdout: ignore reporter.",
)
parser.add_argument(
"--full",
action="store_true",
help="Gather the full logs from eden. Works with the upload and stdout options",
)
parser.add_argument(
"--size",
type=int,
default=1000000,
help=(
"The amount of the logs we should gather in bytes. "
"Size is ignored if --full is set. Defaults to 1M. Works with --upload and --stdout"
),
)
parser.add_argument(
"--path",
action="store_true",
help="Print the location of the EdenFS log file",
)
def upload_logs(
self, args: argparse.Namespace, instance: EdenInstance, eden_log_path: Path
) -> int:
# For ease of use, just use the same rage reporter
rage_processor = instance.get_config_value("rage.reporter", default="")
proc: Optional[subprocess.Popen] = None
if rage_processor and not args.stdout:
proc = subprocess.Popen(shlex.split(rage_processor), stdin=subprocess.PIPE)
sink = proc.stdin
else:
proc = None
sink = sys.stdout.buffer
# pyre-fixme[6]: Expected `IO[bytes]` for 2nd param but got
# `Optional[typing.IO[typing.Any]]`.
rage_mod.print_log_file(eden_log_path, sink, args.full, args.size)
if proc:
# pyre-fixme[16]: `Optional` has no attribute `close`.
sink.close()
proc.wait()
return 0
def run(self, args: argparse.Namespace) -> int:
instance = cmd_util.get_eden_instance(args)
eden_log_path = instance.get_log_path()
if not eden_log_path.exists():
print(f"No log file found at {eden_log_path}", file=sys.stderr)
return 1
if args.path:
print(eden_log_path, file=sys.stdout)
return 0
if args.stdout or args.upload:
return self.upload_logs(args, instance, eden_log_path)
else:
# Display eden's log with the system pager if possible. We could
# add a --tail option.
pager_env = os.getenv("PAGER")
if pager_env:
pager_cmd = shlex.split(pager_env)
else:
pager_cmd = ["less", "+G"]
pager_cmd.append(str(eden_log_path))
os.execvp(pager_cmd[0], pager_cmd)
raise Exception("we should never reach here")
@debug_cmd("logging", "Display or modify logging configuration for the edenfs daemon")
class LoggingCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"-a",
"--all",
action="store_true",
help="Show the configuration of all logging categories, even ones "
"with default configuration settings",
)
parser.add_argument(
"--reset",
action="store_true",
help="Fully reset the logging config to the specified settings rather "
"than updating the current configuration with the new settings. "
"(Beware that you need to specify log handlers unless you want them "
"all to be deleted.)",
)
parser.add_argument(
"config",
type=str,
nargs="?",
help="A log configuration string to use to modify the log settings. See "
"folly/logging/docs/Config.md (https://git.io/fNZhr)"
" for syntax documentation. The most basic syntax is CATEGORY=LEVEL.",
)
def run(self, args: argparse.Namespace) -> int:
instance = cmd_util.get_eden_instance(args)
if args.reset and args.config is None:
# The configuration to use if the caller specifies --reset with no
# explicit config argument.
args.config = (
"WARN:default,eden=DBG2; default=stream:stream=stderr,async=true"
)
with instance.get_thrift_client_legacy() as client:
if args.config is not None:
if args.reset:
print(f"Resetting logging configuration to {args.config!r}")
client.setOption("logging_full", args.config)
else:
print(f"Updating logging configuration with {args.config!r}")
client.setOption("logging", args.config)
print("Updated configuration. New config settings:")
else:
print("Current logging configuration:")
if args.all:
config_str = client.getOption("logging_full")
else:
config_str = client.getOption("logging")
self.print_config(config_str)
return 0
def print_config(self, config_str: str) -> None:
config = json.loads(config_str)
handler_fmt = " {:12} {:12} {}"
separator = " " + ("-" * 76)
print("=== Log Handlers ===")
if not config["handlers"]:
print(" Warning: no log handlers configured!")
else:
print(handler_fmt.format("Name", "Type", "Options"))
print(separator)
for name, handler in sorted(config["handlers"].items()):
options_str = ", ".join(
sorted("{}={}".format(k, v) for k, v in handler["options"].items())
)
print(handler_fmt.format(name, handler["type"], options_str))
print("\n=== Log Categories ===")
category_fmt = " {:50} {:12} {}"
print(category_fmt.format("Name", "Level", "Handlers"))
print(separator)
for name, category in sorted(config["categories"].items()):
# For categories that do not inherit their parent's level (unusual)
# show the level with a trailing '!'
# Don't do this for the root category, though--it never inherits it's
# parent's level since it has no parent.
level_str = category["level"]
if not category["inherit"] and name != "":
level_str = level_str + "!"
# Print the root category name as '.' instead of the empty string just
# to help make it clear that there is a category name here.
# (The logging config parsing code accepts '.' as the root category
# name too.)
if name == "":
name = "."
handlers_str = ", ".join(category["handlers"])
print(category_fmt.format(name, level_str, handlers_str))
@debug_cmd("journal_set_memory_limit", "Sets the journal memory limit")
class DebugJournalSetMemoryLimitCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"limit",
type=int,
help="The amount of memory (in bytes) that the journal can keep.",
)
parser.add_argument(
"path",
nargs="?",
help="The path to an EdenFS mount point. Uses `pwd` by default.",
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, _rel_path = cmd_util.require_checkout(args, args.path)
with instance.get_thrift_client_legacy() as client:
try:
client.setJournalMemoryLimit(bytes(checkout.path), args.limit)
except EdenError as err:
print(err, file=sys.stderr)
return 1
return 0
@debug_cmd("journal_get_memory_limit", "Gets the journal memory limit")
class DebugJournalGetMemoryLimitCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path",
nargs="?",
help="The path to an EdenFS mount point. Uses `pwd` by default.",
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, _rel_path = cmd_util.require_checkout(args, args.path)
with instance.get_thrift_client_legacy() as client:
try:
mem = client.getJournalMemoryLimit(bytes(checkout.path))
except EdenError as err:
print(err, file=sys.stderr)
return 1
print("Journal memory limit is " + stats_print.format_size(mem))
return 0
@debug_cmd(
"flush_journal",
"Flushes the journal, and causes any subscribers to get a truncated result",
)
class DebugFlushJournalCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"path",
nargs="?",
help="The path to an EdenFS mount point. Uses `pwd` by default.",
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, _rel_path = cmd_util.require_checkout(args, args.path)
with instance.get_thrift_client_legacy() as client:
try:
client.flushJournal(bytes(checkout.path))
except EdenError as err:
print(err, file=sys.stderr)
return 1
return 0
@debug_cmd("journal", "Prints the most recent entries from the journal")
class DebugJournalCmd(Subcmd):
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"-n",
"--limit",
type=int,
default=1000,
help="The number of journal entries to print.",
)
parser.add_argument(
"-e",
"--pattern",
type=str,
help="Show only deltas for paths matching this pattern. "
"Specify '^((?!^\\.hg/).)*$' to exclude the .hg/ directory.",
)
parser.add_argument(
"-f",
"--follow",
action="store_true",
default=False,
help="Output appended data as the journal grows.",
)
parser.add_argument(
"-i",
"--ignore-case",
action="store_true",
default=False,
help="Ignore case in the pattern specified by --pattern.",
)
parser.add_argument(
"path",
nargs="?",
help="The path to an EdenFS mount point. Uses `pwd` by default.",
)
def run(self, args: argparse.Namespace) -> int:
pattern: Optional[Pattern[bytes]] = None
if args.pattern:
pattern_bytes = args.pattern.encode("utf-8")
flags = re.IGNORECASE if args.ignore_case else 0
pattern = re.compile(pattern_bytes, flags)
instance, checkout, _ = cmd_util.require_checkout(args, args.path)
mount = bytes(checkout.path)
def refresh(params):
with instance.get_thrift_client_legacy() as client:
journal = client.debugGetRawJournal(params)
deltas = journal.allDeltas
if len(deltas) == 0:
seq_num = params.fromSequenceNumber
else:
seq_num = deltas[0].fromPosition.sequenceNumber + 1
_print_raw_journal_deltas(reversed(deltas), pattern)
return seq_num
try:
params = DebugGetRawJournalParams(
mountPoint=mount, fromSequenceNumber=1, limit=args.limit
)
seq_num = refresh(params)
while args.follow:
REFRESH_SEC = 2
time.sleep(REFRESH_SEC)
params = DebugGetRawJournalParams(
mountPoint=mount, fromSequenceNumber=seq_num
)
seq_num = refresh(params)
except EdenError as err:
print(err, file=sys.stderr)
return 1
except KeyboardInterrupt:
if args.follow:
pass
else:
raise
return 0
def _print_raw_journal_deltas(
deltas: Iterator[DebugJournalDelta], pattern: Optional[Pattern[bytes]]
) -> None:
matcher: Callable[[bytes], bool] = (
(lambda x: True) if pattern is None else cast(Any, pattern.match)
)
labels = {
(False, False): "_",
(False, True): "A",
(True, False): "R",
(True, True): "M",
}
for delta in deltas:
entries: List[str] = []
for path, info in delta.changedPaths.items():
if not matcher(path):
continue
label = labels[(info.existedBefore, info.existedAfter)]
entries.append(f"{label} {os.fsdecode(path)}")
for path in delta.uncleanPaths:
entries.append(f"X {os.fsdecode(path)}")
# Only print journal entries if they changed paths that matched the matcher
# or if they change the current working directory commit.
if entries or delta.fromPosition.snapshotHash != delta.toPosition.snapshotHash:
_print_journal_entry(delta, entries)
def _print_journal_entry(delta: DebugJournalDelta, entries: List[str]) -> None:
if delta.fromPosition.snapshotHash != delta.toPosition.snapshotHash:
from_commit = hash_str(delta.fromPosition.snapshotHash)
to_commit = hash_str(delta.toPosition.snapshotHash)
commit_ids = f"{from_commit} -> {to_commit}"
else:
commit_ids = hash_str(delta.toPosition.snapshotHash)
if delta.fromPosition.sequenceNumber != delta.toPosition.sequenceNumber:
print(
f"MERGE {delta.fromPosition.sequenceNumber}-"
f"{delta.toPosition.sequenceNumber} {commit_ids}"
)
else:
print(f"DELTA {delta.fromPosition.sequenceNumber} {commit_ids}")
if entries:
entries.sort()
print(" " + "\n ".join(entries))
@debug_cmd("thrift", "Invoke a thrift function")
class DebugThriftCmd(Subcmd):
args_suffix = "_args"
result_suffix = "_result"
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"-l",
"--list",
action="store_true",
help="List the available thrift functions.",
)
parser.add_argument(
"--eval-all-args",
action="store_true",
help="Always pass all arguments through eval(), even for plain strings.",
)
parser.add_argument(
"--json", action="store_true", help="Attempt to encode the result as JSON."
)
parser.add_argument(
"function_name", nargs="?", help="The thrift function to call."
)
parser.add_argument(
"args", nargs="*", help="The arguments to the thrift function."
)
def run(self, args: argparse.Namespace) -> int:
if args.list:
self._list_functions()
return 0
if not args.function_name:
print(f"Error: no function name specified", file=sys.stderr)
print(
"Use the --list argument to see a list of available functions, or "
"specify a function name",
file=sys.stderr,
)
return 1
# Look up the function information
try:
fn_info = thrift.util.inspect.get_function_info(
EdenService, args.function_name
)
except thrift.util.inspect.NoSuchFunctionError:
print(f"Error: unknown function {args.function_name!r}", file=sys.stderr)
print(
'Run "eden debug thrift --list" to see a list of available functions',
file=sys.stderr,
)
return 1
if len(args.args) != len(fn_info.arg_specs):
print(
f"Error: {args.function_name} requires {len(fn_info.arg_specs)} "
f"arguments, but {len(args.args)} were supplied>",
file=sys.stderr,
)
return 1
python_args = self._eval_args(
args.args, fn_info, eval_strings=args.eval_all_args
)
def lookup_module_member(modules, name):
for module in modules:
try:
return getattr(module, name)
except AttributeError:
continue
raise AttributeError(f"Failed to find {name} in {modules}")
instance = cmd_util.get_eden_instance(args)
with instance.get_thrift_client_legacy() as client:
fn = getattr(client, args.function_name)
result = fn(**python_args)
if args.json:
# The following back-and-forth is required to reliably
# convert a Python Thrift client result into its JSON
# form. The Python Thrift client returns native Python
# lists and dicts for lists and maps, but they cannot
# be passed directly to TSimpleJSONProtocol. Instead,
# map the result back into a Thrift message, and then
# serialize that as JSON. Finally, strip the message
# container.
#
# NOTE: Stripping the root object means the output may
# not have a root dict or array, which is required by
# most JSON specs. But Python's json module and jq are
# both fine with this deviation.
result_type = lookup_module_member(
[EdenService, BaseService], args.function_name + "_result"
)
json_data = Serializer.serialize(
TSimpleJSONProtocolFactory(), result_type(result)
)
json.dump(
# If the method returns void, json_data will not
# have a "success" field. Print `null` in that
# case.
json.loads(json_data).get("success"),
sys.stdout,
sort_keys=True,
indent=2,
)
sys.stdout.write("\n")
else:
print(result)
return 0
def _eval_args(
self, args: List[str], fn_info: thrift.util.inspect.Function, eval_strings: bool
) -> Dict[str, Any]:
from thrift.Thrift import TType
code_globals = {key: getattr(eden_ttypes, key) for key in dir(eden_ttypes)}
parsed_args = {}
for arg, arg_spec in zip(args, fn_info.arg_specs):
(
_field_id,
thrift_type,
arg_name,
_extra_spec,
_default,
_required,
) = arg_spec
# If the argument is a string type, don't pass it through eval.
# This is purely to make it easier for humans to input strings.
if not eval_strings and thrift_type == TType.STRING:
parsed_arg = arg
else:
# pyre-fixme[6]: For 5th argument expected `bool` but got `int`.
code = compile(arg, "<command_line>", "eval", 0, 1)
parsed_arg = eval(code, code_globals.copy())
parsed_args[arg_name] = parsed_arg
return parsed_args
def _list_functions(self) -> None:
# Report functions by module, from parent service downwards
modules = thrift.util.inspect.get_service_module_hierarchy(EdenService)
for module in reversed(modules):
module_functions = thrift.util.inspect.list_service_functions(module)
print(f"From {module.__name__}:")
for _fn_name, fn_info in sorted(module_functions.items()):
print(f" {fn_info}")
@debug_cmd("drop-fetch-requests", "Drop all pending source control object fetches")
class DropRequestsCmd(Subcmd):
def run(self, args: argparse.Namespace) -> int:
instance = cmd_util.get_eden_instance(args)
with instance.get_thrift_client_legacy() as client:
num_dropped = client.debugDropAllPendingRequests()
print(f"Dropped {num_dropped} source control fetch requests")
return 0
@subcmd_mod.subcmd("debug", "Internal commands for examining EdenFS state")
# pyre-fixme[13]: Attribute `parser` is never initialized.
class DebugCmd(Subcmd):
parser: argparse.ArgumentParser
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
# The "debug_posix" module contains other debug subcommands that are
# supported on POSIX platforms (basically, not Windows). Import this module
# if we aren't running on Windows. This will make sure it has registered all of
# its subcommands in our debug_cmd.commands list.
if sys.platform != "win32":
from . import debug_posix # noqa: F401
else:
from . import debug_windows # noqa: F401
subcmd_add_list: List[Type[Subcmd]] = []
# Save the parser so we can use it to print help in run() if we are
# called with no arguments.
self.parser = parser
self.add_subcommands(parser, debug_cmd.commands + subcmd_add_list)
def run(self, args: argparse.Namespace) -> int:
self.parser.print_help()
return 0