add eden glob command

Summary:
It's silly to use `eden prefetch --no-prefetch` to efficiently glob
for filenames. Introduce an `eden glob` command which resolves a glob
relative to the current working directory.

Reviewed By: genevievehelsel

Differential Revision: D25450358

fbshipit-source-id: 45d6dc870d21510e51d5662c75e80385886899fc
This commit is contained in:
Chad Austin 2021-02-23 19:56:18 -08:00 committed by Facebook GitHub Bot
parent 7a3ac07f7f
commit 68cf44a8d1
7 changed files with 320 additions and 107 deletions

View File

@ -27,7 +27,7 @@ from eden.fs.cli.telemetry import TelemetrySample
from eden.fs.cli.util import check_health_using_lockfile, wait_for_instance_healthy
from eden.thrift.legacy import EdenClient, EdenNotRunningError
from facebook.eden import EdenService
from facebook.eden.ttypes import GlobParams, MountInfo as ThriftMountInfo, MountState
from facebook.eden.ttypes import MountInfo as ThriftMountInfo, MountState
from fb303_core.ttypes import fb303_status
from . import (
@ -2068,6 +2068,7 @@ def create_parser() -> argparse.ArgumentParser:
stats_mod.StatsCmd,
trace_mod.TraceCmd,
redirect_mod.RedirectCmd,
prefetch_mod.GlobCmd,
prefetch_mod.PrefetchCmd,
prefetch_profile_mod.PrefetchProfileCmd,
]

View File

@ -5,29 +5,93 @@
import argparse
import os
import sys
from pathlib import Path
from typing import NamedTuple, List
from facebook.eden.ttypes import GlobParams
from .cmd_util import require_checkout
from .config import EdenCheckout, EdenInstance
from .subcmd import Subcmd
def add_common_arguments(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--repo", help="Specify path to repo root (default: root of cwd)"
)
parser.add_argument(
"--pattern-file",
help=(
"Specify path to a file that lists patterns/files to match, one per line"
),
)
parser.add_argument(
"PATTERN", nargs="*", help="Filename patterns to match via fnmatch"
)
class CheckoutAndPatterns(NamedTuple):
instance: EdenInstance
checkout: EdenCheckout
rel_path: Path
patterns: List[str]
def find_checkout_and_patterns(
args: argparse.Namespace,
) -> CheckoutAndPatterns:
instance, checkout, rel_path = require_checkout(args, args.repo)
if args.repo and rel_path != Path("."):
print(f"{args.repo} is not the root of an eden repo", file=sys.stderr)
raise SystemExit(1)
patterns = list(args.PATTERN)
if args.pattern_file is not None:
with open(args.pattern_file) as f:
patterns.extend(pat.strip() for pat in f.readlines())
return CheckoutAndPatterns(
instance=instance,
checkout=checkout,
rel_path=rel_path,
patterns=patterns,
)
class GlobCmd(Subcmd):
NAME = "glob"
HELP = "Print matching filenames"
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
add_common_arguments(parser)
def run(self, args: argparse.Namespace) -> int:
checkout_and_patterns = find_checkout_and_patterns(args)
with checkout_and_patterns.instance.get_thrift_client_legacy() as client:
result = client.globFiles(
GlobParams(
mountPoint=bytes(checkout_and_patterns.checkout.path),
globs=checkout_and_patterns.patterns,
includeDotfiles=False,
prefetchFiles=False,
suppressFileList=False,
prefetchMetadata=False,
searchRoot=os.fsencode(checkout_and_patterns.rel_path),
)
)
for name in result.matchingFiles:
print(os.fsdecode(name))
return 0
class PrefetchCmd(Subcmd):
NAME = "prefetch"
HELP = "Prefetch content for matching file patterns"
def setup_parser(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--repo", help="Specify path to repo root (default: root of cwd)"
)
parser.add_argument(
"--pattern-file",
help=(
"Specify path to a file that lists patterns/files "
"to match, one per line"
),
)
add_common_arguments(parser)
parser.add_argument(
"--silent",
help="Do not print the names of the matching files",
@ -40,25 +104,15 @@ class PrefetchCmd(Subcmd):
default=False,
action="store_true",
)
parser.add_argument(
"PATTERN", nargs="*", help="Filename patterns to match via fnmatch"
)
def run(self, args: argparse.Namespace) -> int:
instance, checkout, rel_path = require_checkout(args, args.repo)
if args.repo and rel_path != Path("."):
print(f"{args.repo} is not the root of an eden repo")
return 1
checkout_and_patterns = find_checkout_and_patterns(args)
if args.pattern_file is not None:
with open(args.pattern_file) as f:
args.PATTERN += [pat.strip() for pat in f.readlines()]
with instance.get_thrift_client_legacy() as client:
with checkout_and_patterns.instance.get_thrift_client_legacy() as client:
result = client.globFiles(
GlobParams(
mountPoint=bytes(checkout.path),
globs=args.PATTERN,
mountPoint=bytes(checkout_and_patterns.checkout.path),
globs=checkout_and_patterns.patterns,
includeDotfiles=False,
prefetchFiles=not args.no_prefetch,
suppressFileList=args.silent,

View File

@ -59,6 +59,7 @@
#include "eden/fs/store/LocalStore.h"
#include "eden/fs/store/ObjectFetchContext.h"
#include "eden/fs/store/ObjectStore.h"
#include "eden/fs/store/PathLoader.h"
#include "eden/fs/store/hg/HgQueuedBackingStore.h"
#include "eden/fs/telemetry/Tracing.h"
#include "eden/fs/utils/Bug.h"
@ -1151,6 +1152,8 @@ folly::Future<std::unique_ptr<Glob>> EdenServiceHandler::future_globFiles(
// if none are specified. The results will be collected here.
std::vector<folly::Future<std::vector<GlobNode::GlobResult>>> globResults{};
RelativePath searchRoot{*params->searchRoot_ref()};
auto rootHashes = params->revisions_ref();
if (!rootHashes->empty()) {
// Note that we MUST reserve here, otherwise while emplacing we might
@ -1160,32 +1163,53 @@ folly::Future<std::unique_ptr<Glob>> EdenServiceHandler::future_globFiles(
for (auto& rootHash : *rootHashes) {
const Hash& originHash =
originHashes->emplace_back(hashFromThrift(rootHash));
globResults.emplace_back(edenMount->getObjectStore()
->getTreeForCommit(originHash, fetchContext)
.thenValue([edenMount,
globRoot,
&fetchContext,
fileBlobsToPrefetch,
&originHash](auto&& rootTree) {
return globRoot->evaluate(
edenMount->getObjectStore(),
fetchContext,
RelativePathPiece(),
rootTree,
fileBlobsToPrefetch,
originHash);
}));
globResults.emplace_back(
edenMount->getObjectStore()
->getTreeForCommit(originHash, fetchContext)
.thenValue([edenMount,
globRoot,
&fetchContext,
fileBlobsToPrefetch,
searchRoot](std::shared_ptr<const Tree>&& rootTree) {
return resolveTree(
*edenMount->getObjectStore(),
fetchContext,
std::move(rootTree),
searchRoot);
})
.thenValue([edenMount,
globRoot,
&fetchContext,
fileBlobsToPrefetch,
&originHash](std::shared_ptr<const Tree>&& tree) {
return globRoot->evaluate(
edenMount->getObjectStore(),
fetchContext,
RelativePathPiece(),
tree,
fileBlobsToPrefetch,
originHash);
}));
}
} else {
const Hash& originHash =
originHashes->emplace_back(edenMount->getParentCommits().parent1());
globResults.emplace_back(globRoot->evaluate(
edenMount->getObjectStore(),
fetchContext,
RelativePathPiece(),
edenMount->getRootInode(),
fileBlobsToPrefetch,
originHash));
globResults.emplace_back(
edenMount->getInode(searchRoot, helper->getFetchContext())
.thenValue([helper = helper.get(),
globRoot,
edenMount,
fileBlobsToPrefetch,
&originHash](InodePtr inode) {
return globRoot->evaluate(
edenMount->getObjectStore(),
helper->getFetchContext(),
RelativePathPiece(),
inode.asTreePtr(),
fileBlobsToPrefetch,
originHash);
}));
}
return wrapFuture(

View File

@ -689,6 +689,9 @@ struct GlobParams {
// in general we want to prefetch metadata, but some large globs can
// trigger too many metadata prefetches, so we allow skipping this.
8: bool prefetchMetadata = true;
// The directory from which the glob should be evaluated. Defaults to the
// repository root.
9: PathString searchRoot;
}
struct Glob {

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#include "eden/fs/store/PathLoader.h"
#include <vector>
#include "eden/fs/model/Tree.h"
#include "eden/fs/service/gen-cpp2/eden_constants.h"
#include "eden/fs/store/ObjectStore.h"
#include "eden/fs/utils/EdenError.h"
namespace facebook::eden {
namespace {
struct ResolveTreeContext {
std::vector<PathComponent> components;
};
folly::Future<std::shared_ptr<const Tree>> resolveTree(
std::shared_ptr<ResolveTreeContext> ctx,
ObjectStore& objectStore,
ObjectFetchContext& fetchContext,
std::shared_ptr<const Tree> root,
size_t index) {
if (index == ctx->components.size()) {
return std::move(root);
}
auto* child = root->getEntryPtr(ctx->components[index]);
if (!child) {
throw newEdenError(
ENOENT,
EdenErrorType::POSIX_ERROR,
"no child with name ",
ctx->components[index]);
}
if (!child->isTree()) {
throw newEdenError(
ENOTDIR,
EdenErrorType::POSIX_ERROR,
"child is not tree ",
ctx->components[index]);
}
return objectStore.getTree(child->getHash(), fetchContext)
.thenValue([ctx = std::move(ctx), &objectStore, &fetchContext, index](
std::shared_ptr<const Tree>&& tree) mutable {
return resolveTree(
ctx, objectStore, fetchContext, std::move(tree), index + 1);
});
}
} // namespace
folly::Future<std::shared_ptr<const Tree>> resolveTree(
ObjectStore& objectStore,
ObjectFetchContext& fetchContext,
std::shared_ptr<const Tree> root,
RelativePathPiece path) {
// Don't do anything fancy with lifetimes and just get this correct as simply
// as possible. There's room for optimization if it matters.
auto ctx = std::make_shared<ResolveTreeContext>();
for (auto c : path.components()) {
ctx->components.emplace_back(c);
}
return resolveTree(
std::move(ctx), objectStore, fetchContext, std::move(root), 0);
}
} // namespace facebook::eden

View File

@ -0,0 +1,26 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#pragma once
#include <folly/futures/Future.h>
#include "eden/fs/store/ObjectFetchContext.h"
#include "eden/fs/utils/PathFuncs.h"
namespace facebook::eden {
class ObjectFetchContext;
class ObjectStore;
class Tree;
folly::Future<std::shared_ptr<const Tree>> resolveTree(
ObjectStore& objectStore,
ObjectFetchContext& fetchContext,
std::shared_ptr<const Tree> root,
RelativePathPiece path);
} // namespace facebook::eden

View File

@ -38,6 +38,8 @@ class GlobTest(testcase.EdenRepoTest):
self.repo.write_file("java/com/example/foo/bar/Bar.java", "")
self.repo.write_file("java/com/example/foo/bar/baz/Baz.java", "")
self.repo.write_file("other/exclude.java", "")
self.commit1 = self.repo.commit("Commit 1.")
def setUp(self) -> None:
@ -53,64 +55,65 @@ class GlobTest(testcase.EdenRepoTest):
self.addCleanup(self.client.close)
def test_exact_path_component_match(self) -> None:
self.assert_glob(["hello"], ["hello"])
self.assert_glob(["ddir/subdir/.dotfile"], ["ddir/subdir/.dotfile"])
self.assert_glob(["hello"], [b"hello"])
self.assert_glob(["ddir/subdir/.dotfile"], [b"ddir/subdir/.dotfile"])
def test_wildcard_path_component_match(self) -> None:
self.assert_glob(["hel*"], ["hello"])
self.assert_glob(["ad*"], ["adir"])
self.assert_glob_with_dtypes(["ad*"], [("adir", "d")])
self.assert_glob(["a*/file"], ["adir/file"])
self.assert_glob_with_dtypes(["a*/file"], [("adir/file", "f")])
self.assert_glob(["hel*"], [b"hello"])
self.assert_glob(["ad*"], [b"adir"])
self.assert_glob_with_dtypes(["ad*"], [(b"adir", "d")])
self.assert_glob(["a*/file"], [b"adir/file"])
self.assert_glob_with_dtypes(["a*/file"], [(b"adir/file", "f")])
def test_no_accidental_substring_match(self) -> None:
self.assert_glob(["hell"], [], msg="No accidental substring match")
def test_match_all_files_in_directory(self) -> None:
self.assert_glob(["bdir/*"], ["bdir/file", "bdir/otherfile"])
self.assert_glob(["bdir/*"], [b"bdir/file", b"bdir/otherfile"])
def test_match_all_files_in_directory_with_dotfile(self) -> None:
self.assert_glob(["ddir/subdir/*"], ["ddir/subdir/notdotfile"])
self.assert_glob(["ddir/subdir/*"], [b"ddir/subdir/notdotfile"])
def test_overlapping_globs(self) -> None:
self.assert_glob(
["adir/*", "**/file"],
["adir/file", "bdir/file"],
[b"adir/file", b"bdir/file"],
msg="De-duplicate results from multiple globs",
)
def test_recursive_wildcard_prefix(self) -> None:
self.assert_glob(["**/file"], ["adir/file", "bdir/file"])
self.assert_glob(["**/file"], [b"adir/file", b"bdir/file"])
def test_recursive_wildcard_suffix(self) -> None:
self.assert_glob(["adir/**"], ["adir/file"])
self.assert_glob(["adir/**/*"], ["adir/file"])
self.assert_glob(["adir/**"], [b"adir/file"])
self.assert_glob(["adir/**/*"], [b"adir/file"])
def test_recursive_wildcard_suffix_with_dotfile(self) -> None:
self.assert_glob(
["ddir/**"], ["ddir/notdotfile", "ddir/subdir", "ddir/subdir/notdotfile"]
["ddir/**"], [b"ddir/notdotfile", b"ddir/subdir", b"ddir/subdir/notdotfile"]
)
self.assert_glob(
["ddir/**"],
[
"ddir/notdotfile",
"ddir/subdir",
"ddir/subdir/.dotfile",
"ddir/subdir/notdotfile",
b"ddir/notdotfile",
b"ddir/subdir",
b"ddir/subdir/.dotfile",
b"ddir/subdir/notdotfile",
],
include_dotfiles=True,
)
self.assert_glob(
["ddir/**/*"], ["ddir/notdotfile", "ddir/subdir", "ddir/subdir/notdotfile"]
["ddir/**/*"],
[b"ddir/notdotfile", b"ddir/subdir", b"ddir/subdir/notdotfile"],
)
self.assert_glob(
["ddir/**/*"],
[
"ddir/notdotfile",
"ddir/subdir",
"ddir/subdir/.dotfile",
"ddir/subdir/notdotfile",
b"ddir/notdotfile",
b"ddir/subdir",
b"ddir/subdir/.dotfile",
b"ddir/subdir/notdotfile",
],
include_dotfiles=True,
)
@ -119,14 +122,14 @@ class GlobTest(testcase.EdenRepoTest):
self.assert_glob(
["java/com/**/*.java"],
[
"java/com/example/Example.java",
"java/com/example/foo/Foo.java",
"java/com/example/foo/bar/Bar.java",
"java/com/example/foo/bar/baz/Baz.java",
b"java/com/example/Example.java",
b"java/com/example/foo/Foo.java",
b"java/com/example/foo/bar/Bar.java",
b"java/com/example/foo/bar/baz/Baz.java",
],
)
self.assert_glob(
["java/com/example/*/*.java"], ["java/com/example/foo/Foo.java"]
["java/com/example/*/*.java"], [b"java/com/example/foo/Foo.java"]
)
def test_malformed_query(self) -> None:
@ -154,37 +157,37 @@ class GlobTest(testcase.EdenRepoTest):
self.assertEqual(EdenErrorType.ARGUMENT_ERROR, ctx.exception.errorType)
def test_glob_on_non_current_commit(self) -> None:
self.assert_glob(["hello"], ["hello"], commits=[bytes.fromhex(self.commit0)])
self.assert_glob(["hola"], ["hola"], commits=[bytes.fromhex(self.commit0)])
self.assert_glob(["hello"], [b"hello"], commits=[bytes.fromhex(self.commit0)])
self.assert_glob(["hola"], [b"hola"], commits=[bytes.fromhex(self.commit0)])
def test_glob_multiple_commits(self) -> None:
self.assert_glob(
["hello"],
["hello", "hello"],
[b"hello", b"hello"],
commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)],
)
self.assert_glob(
["h*"],
["hello", "hello", "hola"],
[b"hello", b"hello", b"hola"],
commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)],
)
self.assert_glob(
["a*/*ile"],
["adir/file", "adir/phile"],
[b"adir/file", b"adir/phile"],
commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)],
)
def test_prefetch_matching_files(self) -> None:
self.assert_glob(["hello"], ["hello"], prefetching=True)
self.assert_glob(["hello"], [b"hello"], prefetching=True)
self.assert_glob(
["hello"],
["hello"],
[b"hello"],
prefetching=True,
commits=[bytes.fromhex(self.commit0)],
)
self.assert_glob(
["hello"],
["hello", "hello"],
[b"hello", b"hello"],
prefetching=True,
commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)],
)
@ -192,13 +195,13 @@ class GlobTest(testcase.EdenRepoTest):
def test_simple_matching_commit(self) -> None:
self.assert_glob(
["hello"],
expected_matches=["hello"],
expected_matches=[b"hello"],
expected_commits=[bytes.fromhex(self.commit1)],
)
self.assert_glob(
["hello"],
expected_matches=["hello"],
expected_matches=[b"hello"],
expected_commits=[bytes.fromhex(self.commit0)],
commits=[bytes.fromhex(self.commit0)],
)
@ -206,7 +209,7 @@ class GlobTest(testcase.EdenRepoTest):
def test_duplicate_file_multiple_commits(self) -> None:
self.assert_glob(
["hello"],
expected_matches=["hello", "hello"],
expected_matches=[b"hello", b"hello"],
expected_commits=[
bytes.fromhex(self.commit0),
bytes.fromhex(self.commit1),
@ -214,26 +217,58 @@ class GlobTest(testcase.EdenRepoTest):
commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)],
)
def test_multiple_file_multiple_commits(self) -> None:
self.assert_glob(
["a*/*ile"],
[b"adir/file", b"adir/phile"],
expected_commits=[
bytes.fromhex(self.commit1),
bytes.fromhex(self.commit0),
],
commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)],
)
def test_multiple_file_multiple_commits(self) -> None:
self.assert_glob(
["a*/*ile"],
[b"adir/file", b"adir/phile"],
expected_commits=[
bytes.fromhex(self.commit1),
bytes.fromhex(self.commit0),
],
commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)],
)
def test_search_root(self) -> None:
self.assert_glob(
["**/*.java"],
expected_matches=[
b"example/Example.java",
b"example/foo/Foo.java",
b"example/foo/bar/Bar.java",
b"example/foo/bar/baz/Baz.java",
],
search_root=b"java/com",
)
def test_search_root_with_specified_commits(self) -> None:
self.assert_glob(
["**/*.java"],
expected_matches=[
b"example/Example.java",
b"example/foo/Foo.java",
b"example/foo/bar/Bar.java",
b"example/foo/bar/baz/Baz.java",
],
expected_commits=[
bytes.fromhex(self.commit1),
bytes.fromhex(self.commit1),
bytes.fromhex(self.commit1),
bytes.fromhex(self.commit1),
],
commits=[bytes.fromhex(self.commit1)],
search_root=b"java/com",
)
def assert_glob(
self,
globs: List[str],
expected_matches: List[str],
expected_matches: List[bytes],
include_dotfiles: bool = False,
msg: Optional[str] = None,
commits: Optional[List[bytes]] = None,
prefetching: bool = False,
expected_commits: Optional[List[bytes]] = None,
search_root: Optional[bytes] = None,
) -> None:
params = GlobParams(
mountPoint=self.mount_path_bytes,
@ -241,13 +276,10 @@ class GlobTest(testcase.EdenRepoTest):
includeDotfiles=include_dotfiles,
prefetchFiles=prefetching,
revisions=commits,
searchRoot=search_root,
)
result = self.client.globFiles(params)
path_results = (
path.decode("utf-8", errors="surrogateescape")
for path in result.matchingFiles
)
self.assertEqual(expected_matches, sorted(path_results), msg=msg)
self.assertEqual(expected_matches, sorted(result.matchingFiles), msg=msg)
self.assertFalse(result.dtypes)
if expected_commits:
@ -258,7 +290,7 @@ class GlobTest(testcase.EdenRepoTest):
def assert_glob_with_dtypes(
self,
globs: List[str],
expected_matches: List[Tuple[str, str]],
expected_matches: List[Tuple[bytes, str]],
include_dotfiles: bool = False,
msg: Optional[str] = None,
) -> None:
@ -270,10 +302,7 @@ class GlobTest(testcase.EdenRepoTest):
)
result = self.client.globFiles(params)
actual_results = zip(
(
path.decode("utf-8", errors="surrogateescape")
for path in result.matchingFiles
),
result.matchingFiles,
(_dtype_to_str(dtype) for dtype in result.dtypes),
)
self.assertEqual(expected_matches, sorted(actual_results), msg=msg)