From 68cf44a8d16676ebf98504ee55d5ac415b10173e Mon Sep 17 00:00:00 2001 From: Chad Austin Date: Tue, 23 Feb 2021 19:56:18 -0800 Subject: [PATCH] add eden glob command Summary: It's silly to use `eden prefetch --no-prefetch` to efficiently glob for filenames. Introduce an `eden glob` command which resolves a glob relative to the current working directory. Reviewed By: genevievehelsel Differential Revision: D25450358 fbshipit-source-id: 45d6dc870d21510e51d5662c75e80385886899fc --- eden/fs/cli/main.py | 3 +- eden/fs/cli/prefetch.py | 102 +++++++++++++---- eden/fs/service/EdenServiceHandler.cpp | 68 +++++++---- eden/fs/service/eden.thrift | 3 + eden/fs/store/PathLoader.cpp | 76 +++++++++++++ eden/fs/store/PathLoader.h | 26 +++++ eden/integration/glob_test.py | 149 +++++++++++++++---------- 7 files changed, 320 insertions(+), 107 deletions(-) create mode 100644 eden/fs/store/PathLoader.cpp create mode 100644 eden/fs/store/PathLoader.h diff --git a/eden/fs/cli/main.py b/eden/fs/cli/main.py index e985656214..e8fb11284a 100644 --- a/eden/fs/cli/main.py +++ b/eden/fs/cli/main.py @@ -27,7 +27,7 @@ from eden.fs.cli.telemetry import TelemetrySample from eden.fs.cli.util import check_health_using_lockfile, wait_for_instance_healthy from eden.thrift.legacy import EdenClient, EdenNotRunningError from facebook.eden import EdenService -from facebook.eden.ttypes import GlobParams, MountInfo as ThriftMountInfo, MountState +from facebook.eden.ttypes import MountInfo as ThriftMountInfo, MountState from fb303_core.ttypes import fb303_status from . import ( @@ -2068,6 +2068,7 @@ def create_parser() -> argparse.ArgumentParser: stats_mod.StatsCmd, trace_mod.TraceCmd, redirect_mod.RedirectCmd, + prefetch_mod.GlobCmd, prefetch_mod.PrefetchCmd, prefetch_profile_mod.PrefetchProfileCmd, ] diff --git a/eden/fs/cli/prefetch.py b/eden/fs/cli/prefetch.py index cbbf5dd942..f932033c40 100644 --- a/eden/fs/cli/prefetch.py +++ b/eden/fs/cli/prefetch.py @@ -5,29 +5,93 @@ import argparse import os +import sys from pathlib import Path +from typing import NamedTuple, List from facebook.eden.ttypes import GlobParams from .cmd_util import require_checkout +from .config import EdenCheckout, EdenInstance from .subcmd import Subcmd +def add_common_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--repo", help="Specify path to repo root (default: root of cwd)" + ) + parser.add_argument( + "--pattern-file", + help=( + "Specify path to a file that lists patterns/files to match, one per line" + ), + ) + parser.add_argument( + "PATTERN", nargs="*", help="Filename patterns to match via fnmatch" + ) + + +class CheckoutAndPatterns(NamedTuple): + instance: EdenInstance + checkout: EdenCheckout + rel_path: Path + patterns: List[str] + + +def find_checkout_and_patterns( + args: argparse.Namespace, +) -> CheckoutAndPatterns: + instance, checkout, rel_path = require_checkout(args, args.repo) + if args.repo and rel_path != Path("."): + print(f"{args.repo} is not the root of an eden repo", file=sys.stderr) + raise SystemExit(1) + + patterns = list(args.PATTERN) + if args.pattern_file is not None: + with open(args.pattern_file) as f: + patterns.extend(pat.strip() for pat in f.readlines()) + + return CheckoutAndPatterns( + instance=instance, + checkout=checkout, + rel_path=rel_path, + patterns=patterns, + ) + + +class GlobCmd(Subcmd): + NAME = "glob" + HELP = "Print matching filenames" + + def setup_parser(self, parser: argparse.ArgumentParser) -> None: + add_common_arguments(parser) + + def run(self, args: argparse.Namespace) -> int: + checkout_and_patterns = find_checkout_and_patterns(args) + + with checkout_and_patterns.instance.get_thrift_client_legacy() as client: + result = client.globFiles( + GlobParams( + mountPoint=bytes(checkout_and_patterns.checkout.path), + globs=checkout_and_patterns.patterns, + includeDotfiles=False, + prefetchFiles=False, + suppressFileList=False, + prefetchMetadata=False, + searchRoot=os.fsencode(checkout_and_patterns.rel_path), + ) + ) + for name in result.matchingFiles: + print(os.fsdecode(name)) + return 0 + + class PrefetchCmd(Subcmd): NAME = "prefetch" HELP = "Prefetch content for matching file patterns" def setup_parser(self, parser: argparse.ArgumentParser) -> None: - parser.add_argument( - "--repo", help="Specify path to repo root (default: root of cwd)" - ) - parser.add_argument( - "--pattern-file", - help=( - "Specify path to a file that lists patterns/files " - "to match, one per line" - ), - ) + add_common_arguments(parser) parser.add_argument( "--silent", help="Do not print the names of the matching files", @@ -40,25 +104,15 @@ class PrefetchCmd(Subcmd): default=False, action="store_true", ) - parser.add_argument( - "PATTERN", nargs="*", help="Filename patterns to match via fnmatch" - ) def run(self, args: argparse.Namespace) -> int: - instance, checkout, rel_path = require_checkout(args, args.repo) - if args.repo and rel_path != Path("."): - print(f"{args.repo} is not the root of an eden repo") - return 1 + checkout_and_patterns = find_checkout_and_patterns(args) - if args.pattern_file is not None: - with open(args.pattern_file) as f: - args.PATTERN += [pat.strip() for pat in f.readlines()] - - with instance.get_thrift_client_legacy() as client: + with checkout_and_patterns.instance.get_thrift_client_legacy() as client: result = client.globFiles( GlobParams( - mountPoint=bytes(checkout.path), - globs=args.PATTERN, + mountPoint=bytes(checkout_and_patterns.checkout.path), + globs=checkout_and_patterns.patterns, includeDotfiles=False, prefetchFiles=not args.no_prefetch, suppressFileList=args.silent, diff --git a/eden/fs/service/EdenServiceHandler.cpp b/eden/fs/service/EdenServiceHandler.cpp index 3aebacacd4..35928d1d02 100644 --- a/eden/fs/service/EdenServiceHandler.cpp +++ b/eden/fs/service/EdenServiceHandler.cpp @@ -59,6 +59,7 @@ #include "eden/fs/store/LocalStore.h" #include "eden/fs/store/ObjectFetchContext.h" #include "eden/fs/store/ObjectStore.h" +#include "eden/fs/store/PathLoader.h" #include "eden/fs/store/hg/HgQueuedBackingStore.h" #include "eden/fs/telemetry/Tracing.h" #include "eden/fs/utils/Bug.h" @@ -1151,6 +1152,8 @@ folly::Future> EdenServiceHandler::future_globFiles( // if none are specified. The results will be collected here. std::vector>> globResults{}; + RelativePath searchRoot{*params->searchRoot_ref()}; + auto rootHashes = params->revisions_ref(); if (!rootHashes->empty()) { // Note that we MUST reserve here, otherwise while emplacing we might @@ -1160,32 +1163,53 @@ folly::Future> EdenServiceHandler::future_globFiles( for (auto& rootHash : *rootHashes) { const Hash& originHash = originHashes->emplace_back(hashFromThrift(rootHash)); - globResults.emplace_back(edenMount->getObjectStore() - ->getTreeForCommit(originHash, fetchContext) - .thenValue([edenMount, - globRoot, - &fetchContext, - fileBlobsToPrefetch, - &originHash](auto&& rootTree) { - return globRoot->evaluate( - edenMount->getObjectStore(), - fetchContext, - RelativePathPiece(), - rootTree, - fileBlobsToPrefetch, - originHash); - })); + + globResults.emplace_back( + edenMount->getObjectStore() + ->getTreeForCommit(originHash, fetchContext) + .thenValue([edenMount, + globRoot, + &fetchContext, + fileBlobsToPrefetch, + searchRoot](std::shared_ptr&& rootTree) { + return resolveTree( + *edenMount->getObjectStore(), + fetchContext, + std::move(rootTree), + searchRoot); + }) + .thenValue([edenMount, + globRoot, + &fetchContext, + fileBlobsToPrefetch, + &originHash](std::shared_ptr&& tree) { + return globRoot->evaluate( + edenMount->getObjectStore(), + fetchContext, + RelativePathPiece(), + tree, + fileBlobsToPrefetch, + originHash); + })); } } else { const Hash& originHash = originHashes->emplace_back(edenMount->getParentCommits().parent1()); - globResults.emplace_back(globRoot->evaluate( - edenMount->getObjectStore(), - fetchContext, - RelativePathPiece(), - edenMount->getRootInode(), - fileBlobsToPrefetch, - originHash)); + globResults.emplace_back( + edenMount->getInode(searchRoot, helper->getFetchContext()) + .thenValue([helper = helper.get(), + globRoot, + edenMount, + fileBlobsToPrefetch, + &originHash](InodePtr inode) { + return globRoot->evaluate( + edenMount->getObjectStore(), + helper->getFetchContext(), + RelativePathPiece(), + inode.asTreePtr(), + fileBlobsToPrefetch, + originHash); + })); } return wrapFuture( diff --git a/eden/fs/service/eden.thrift b/eden/fs/service/eden.thrift index 1c12fa6950..0785379841 100644 --- a/eden/fs/service/eden.thrift +++ b/eden/fs/service/eden.thrift @@ -689,6 +689,9 @@ struct GlobParams { // in general we want to prefetch metadata, but some large globs can // trigger too many metadata prefetches, so we allow skipping this. 8: bool prefetchMetadata = true; + // The directory from which the glob should be evaluated. Defaults to the + // repository root. + 9: PathString searchRoot; } struct Glob { diff --git a/eden/fs/store/PathLoader.cpp b/eden/fs/store/PathLoader.cpp new file mode 100644 index 0000000000..ac6b44ed17 --- /dev/null +++ b/eden/fs/store/PathLoader.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +#include "eden/fs/store/PathLoader.h" +#include +#include "eden/fs/model/Tree.h" +#include "eden/fs/service/gen-cpp2/eden_constants.h" +#include "eden/fs/store/ObjectStore.h" +#include "eden/fs/utils/EdenError.h" + +namespace facebook::eden { + +namespace { + +struct ResolveTreeContext { + std::vector components; +}; + +folly::Future> resolveTree( + std::shared_ptr ctx, + ObjectStore& objectStore, + ObjectFetchContext& fetchContext, + std::shared_ptr root, + size_t index) { + if (index == ctx->components.size()) { + return std::move(root); + } + + auto* child = root->getEntryPtr(ctx->components[index]); + if (!child) { + throw newEdenError( + ENOENT, + EdenErrorType::POSIX_ERROR, + "no child with name ", + ctx->components[index]); + } + + if (!child->isTree()) { + throw newEdenError( + ENOTDIR, + EdenErrorType::POSIX_ERROR, + "child is not tree ", + ctx->components[index]); + } + + return objectStore.getTree(child->getHash(), fetchContext) + .thenValue([ctx = std::move(ctx), &objectStore, &fetchContext, index]( + std::shared_ptr&& tree) mutable { + return resolveTree( + ctx, objectStore, fetchContext, std::move(tree), index + 1); + }); +} + +} // namespace + +folly::Future> resolveTree( + ObjectStore& objectStore, + ObjectFetchContext& fetchContext, + std::shared_ptr root, + RelativePathPiece path) { + // Don't do anything fancy with lifetimes and just get this correct as simply + // as possible. There's room for optimization if it matters. + auto ctx = std::make_shared(); + for (auto c : path.components()) { + ctx->components.emplace_back(c); + } + + return resolveTree( + std::move(ctx), objectStore, fetchContext, std::move(root), 0); +} + +} // namespace facebook::eden diff --git a/eden/fs/store/PathLoader.h b/eden/fs/store/PathLoader.h new file mode 100644 index 0000000000..0ff63faab0 --- /dev/null +++ b/eden/fs/store/PathLoader.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +#pragma once + +#include +#include "eden/fs/store/ObjectFetchContext.h" +#include "eden/fs/utils/PathFuncs.h" + +namespace facebook::eden { + +class ObjectFetchContext; +class ObjectStore; +class Tree; + +folly::Future> resolveTree( + ObjectStore& objectStore, + ObjectFetchContext& fetchContext, + std::shared_ptr root, + RelativePathPiece path); + +} // namespace facebook::eden diff --git a/eden/integration/glob_test.py b/eden/integration/glob_test.py index 1c8a373877..7ad0670d5b 100644 --- a/eden/integration/glob_test.py +++ b/eden/integration/glob_test.py @@ -38,6 +38,8 @@ class GlobTest(testcase.EdenRepoTest): self.repo.write_file("java/com/example/foo/bar/Bar.java", "") self.repo.write_file("java/com/example/foo/bar/baz/Baz.java", "") + self.repo.write_file("other/exclude.java", "") + self.commit1 = self.repo.commit("Commit 1.") def setUp(self) -> None: @@ -53,64 +55,65 @@ class GlobTest(testcase.EdenRepoTest): self.addCleanup(self.client.close) def test_exact_path_component_match(self) -> None: - self.assert_glob(["hello"], ["hello"]) - self.assert_glob(["ddir/subdir/.dotfile"], ["ddir/subdir/.dotfile"]) + self.assert_glob(["hello"], [b"hello"]) + self.assert_glob(["ddir/subdir/.dotfile"], [b"ddir/subdir/.dotfile"]) def test_wildcard_path_component_match(self) -> None: - self.assert_glob(["hel*"], ["hello"]) - self.assert_glob(["ad*"], ["adir"]) - self.assert_glob_with_dtypes(["ad*"], [("adir", "d")]) - self.assert_glob(["a*/file"], ["adir/file"]) - self.assert_glob_with_dtypes(["a*/file"], [("adir/file", "f")]) + self.assert_glob(["hel*"], [b"hello"]) + self.assert_glob(["ad*"], [b"adir"]) + self.assert_glob_with_dtypes(["ad*"], [(b"adir", "d")]) + self.assert_glob(["a*/file"], [b"adir/file"]) + self.assert_glob_with_dtypes(["a*/file"], [(b"adir/file", "f")]) def test_no_accidental_substring_match(self) -> None: self.assert_glob(["hell"], [], msg="No accidental substring match") def test_match_all_files_in_directory(self) -> None: - self.assert_glob(["bdir/*"], ["bdir/file", "bdir/otherfile"]) + self.assert_glob(["bdir/*"], [b"bdir/file", b"bdir/otherfile"]) def test_match_all_files_in_directory_with_dotfile(self) -> None: - self.assert_glob(["ddir/subdir/*"], ["ddir/subdir/notdotfile"]) + self.assert_glob(["ddir/subdir/*"], [b"ddir/subdir/notdotfile"]) def test_overlapping_globs(self) -> None: self.assert_glob( ["adir/*", "**/file"], - ["adir/file", "bdir/file"], + [b"adir/file", b"bdir/file"], msg="De-duplicate results from multiple globs", ) def test_recursive_wildcard_prefix(self) -> None: - self.assert_glob(["**/file"], ["adir/file", "bdir/file"]) + self.assert_glob(["**/file"], [b"adir/file", b"bdir/file"]) def test_recursive_wildcard_suffix(self) -> None: - self.assert_glob(["adir/**"], ["adir/file"]) - self.assert_glob(["adir/**/*"], ["adir/file"]) + self.assert_glob(["adir/**"], [b"adir/file"]) + self.assert_glob(["adir/**/*"], [b"adir/file"]) def test_recursive_wildcard_suffix_with_dotfile(self) -> None: self.assert_glob( - ["ddir/**"], ["ddir/notdotfile", "ddir/subdir", "ddir/subdir/notdotfile"] + ["ddir/**"], [b"ddir/notdotfile", b"ddir/subdir", b"ddir/subdir/notdotfile"] ) self.assert_glob( ["ddir/**"], [ - "ddir/notdotfile", - "ddir/subdir", - "ddir/subdir/.dotfile", - "ddir/subdir/notdotfile", + b"ddir/notdotfile", + b"ddir/subdir", + b"ddir/subdir/.dotfile", + b"ddir/subdir/notdotfile", ], include_dotfiles=True, ) self.assert_glob( - ["ddir/**/*"], ["ddir/notdotfile", "ddir/subdir", "ddir/subdir/notdotfile"] + ["ddir/**/*"], + [b"ddir/notdotfile", b"ddir/subdir", b"ddir/subdir/notdotfile"], ) self.assert_glob( ["ddir/**/*"], [ - "ddir/notdotfile", - "ddir/subdir", - "ddir/subdir/.dotfile", - "ddir/subdir/notdotfile", + b"ddir/notdotfile", + b"ddir/subdir", + b"ddir/subdir/.dotfile", + b"ddir/subdir/notdotfile", ], include_dotfiles=True, ) @@ -119,14 +122,14 @@ class GlobTest(testcase.EdenRepoTest): self.assert_glob( ["java/com/**/*.java"], [ - "java/com/example/Example.java", - "java/com/example/foo/Foo.java", - "java/com/example/foo/bar/Bar.java", - "java/com/example/foo/bar/baz/Baz.java", + b"java/com/example/Example.java", + b"java/com/example/foo/Foo.java", + b"java/com/example/foo/bar/Bar.java", + b"java/com/example/foo/bar/baz/Baz.java", ], ) self.assert_glob( - ["java/com/example/*/*.java"], ["java/com/example/foo/Foo.java"] + ["java/com/example/*/*.java"], [b"java/com/example/foo/Foo.java"] ) def test_malformed_query(self) -> None: @@ -154,37 +157,37 @@ class GlobTest(testcase.EdenRepoTest): self.assertEqual(EdenErrorType.ARGUMENT_ERROR, ctx.exception.errorType) def test_glob_on_non_current_commit(self) -> None: - self.assert_glob(["hello"], ["hello"], commits=[bytes.fromhex(self.commit0)]) - self.assert_glob(["hola"], ["hola"], commits=[bytes.fromhex(self.commit0)]) + self.assert_glob(["hello"], [b"hello"], commits=[bytes.fromhex(self.commit0)]) + self.assert_glob(["hola"], [b"hola"], commits=[bytes.fromhex(self.commit0)]) def test_glob_multiple_commits(self) -> None: self.assert_glob( ["hello"], - ["hello", "hello"], + [b"hello", b"hello"], commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)], ) self.assert_glob( ["h*"], - ["hello", "hello", "hola"], + [b"hello", b"hello", b"hola"], commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)], ) self.assert_glob( ["a*/*ile"], - ["adir/file", "adir/phile"], + [b"adir/file", b"adir/phile"], commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)], ) def test_prefetch_matching_files(self) -> None: - self.assert_glob(["hello"], ["hello"], prefetching=True) + self.assert_glob(["hello"], [b"hello"], prefetching=True) self.assert_glob( ["hello"], - ["hello"], + [b"hello"], prefetching=True, commits=[bytes.fromhex(self.commit0)], ) self.assert_glob( ["hello"], - ["hello", "hello"], + [b"hello", b"hello"], prefetching=True, commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)], ) @@ -192,13 +195,13 @@ class GlobTest(testcase.EdenRepoTest): def test_simple_matching_commit(self) -> None: self.assert_glob( ["hello"], - expected_matches=["hello"], + expected_matches=[b"hello"], expected_commits=[bytes.fromhex(self.commit1)], ) self.assert_glob( ["hello"], - expected_matches=["hello"], + expected_matches=[b"hello"], expected_commits=[bytes.fromhex(self.commit0)], commits=[bytes.fromhex(self.commit0)], ) @@ -206,7 +209,7 @@ class GlobTest(testcase.EdenRepoTest): def test_duplicate_file_multiple_commits(self) -> None: self.assert_glob( ["hello"], - expected_matches=["hello", "hello"], + expected_matches=[b"hello", b"hello"], expected_commits=[ bytes.fromhex(self.commit0), bytes.fromhex(self.commit1), @@ -214,26 +217,58 @@ class GlobTest(testcase.EdenRepoTest): commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)], ) - def test_multiple_file_multiple_commits(self) -> None: - self.assert_glob( - ["a*/*ile"], - [b"adir/file", b"adir/phile"], - expected_commits=[ - bytes.fromhex(self.commit1), - bytes.fromhex(self.commit0), - ], - commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)], - ) + def test_multiple_file_multiple_commits(self) -> None: + self.assert_glob( + ["a*/*ile"], + [b"adir/file", b"adir/phile"], + expected_commits=[ + bytes.fromhex(self.commit1), + bytes.fromhex(self.commit0), + ], + commits=[bytes.fromhex(self.commit0), bytes.fromhex(self.commit1)], + ) + + def test_search_root(self) -> None: + self.assert_glob( + ["**/*.java"], + expected_matches=[ + b"example/Example.java", + b"example/foo/Foo.java", + b"example/foo/bar/Bar.java", + b"example/foo/bar/baz/Baz.java", + ], + search_root=b"java/com", + ) + + def test_search_root_with_specified_commits(self) -> None: + self.assert_glob( + ["**/*.java"], + expected_matches=[ + b"example/Example.java", + b"example/foo/Foo.java", + b"example/foo/bar/Bar.java", + b"example/foo/bar/baz/Baz.java", + ], + expected_commits=[ + bytes.fromhex(self.commit1), + bytes.fromhex(self.commit1), + bytes.fromhex(self.commit1), + bytes.fromhex(self.commit1), + ], + commits=[bytes.fromhex(self.commit1)], + search_root=b"java/com", + ) def assert_glob( self, globs: List[str], - expected_matches: List[str], + expected_matches: List[bytes], include_dotfiles: bool = False, msg: Optional[str] = None, commits: Optional[List[bytes]] = None, prefetching: bool = False, expected_commits: Optional[List[bytes]] = None, + search_root: Optional[bytes] = None, ) -> None: params = GlobParams( mountPoint=self.mount_path_bytes, @@ -241,13 +276,10 @@ class GlobTest(testcase.EdenRepoTest): includeDotfiles=include_dotfiles, prefetchFiles=prefetching, revisions=commits, + searchRoot=search_root, ) result = self.client.globFiles(params) - path_results = ( - path.decode("utf-8", errors="surrogateescape") - for path in result.matchingFiles - ) - self.assertEqual(expected_matches, sorted(path_results), msg=msg) + self.assertEqual(expected_matches, sorted(result.matchingFiles), msg=msg) self.assertFalse(result.dtypes) if expected_commits: @@ -258,7 +290,7 @@ class GlobTest(testcase.EdenRepoTest): def assert_glob_with_dtypes( self, globs: List[str], - expected_matches: List[Tuple[str, str]], + expected_matches: List[Tuple[bytes, str]], include_dotfiles: bool = False, msg: Optional[str] = None, ) -> None: @@ -270,10 +302,7 @@ class GlobTest(testcase.EdenRepoTest): ) result = self.client.globFiles(params) actual_results = zip( - ( - path.decode("utf-8", errors="surrogateescape") - for path in result.matchingFiles - ), + result.matchingFiles, (_dtype_to_str(dtype) for dtype in result.dtypes), ) self.assertEqual(expected_matches, sorted(actual_results), msg=msg)