bridge to mercurial for partial glob matching

Summary:
This version is working, see integration test in following diff

It can correctly do partial glob matching so that directories won't be filtered out prematurely

However, this version still needs improvement:

1. I can probably do similar thing to Michael's promise in HgSparseFilter
2. pass u8 to rust instead of a vec of string
3. check on windows

Reviewed By: kmancini

Differential Revision: D50427596

fbshipit-source-id: b95f1c6333978ee79ad1ba1f28cac9cd64441fc5
This commit is contained in:
Xinyi Wang 2024-01-19 09:15:08 -08:00 committed by Facebook GitHub Bot
parent 2d0c068f0d
commit 871e4a9760
14 changed files with 270 additions and 127 deletions

View File

@ -74,7 +74,7 @@
#include "eden/fs/store/PathLoader.h"
#include "eden/fs/store/ScmStatusDiffCallback.h"
#include "eden/fs/store/TreeCache.h"
#include "eden/fs/store/filter/WatchmanGlobFilter.h"
#include "eden/fs/store/filter/GlobFilter.h"
#include "eden/fs/store/hg/HgQueuedBackingStore.h"
#include "eden/fs/telemetry/SessionInfo.h"
#include "eden/fs/telemetry/TaskTrace.h"
@ -1674,16 +1674,16 @@ void sumUncommitedChanges(
const JournalDeltaRange& range,
const folly::Synchronized<ThriftStreamPublisherOwner<ChangedFileResult>>&
publisher,
const std::optional<WatchmanGlobFilter>& filter) {
std::optional<std::reference_wrapper<GlobFilter>> filter) {
for (auto& entry : range.changedFilesInOverlay) {
const auto& changeInfo = entry.second;
// the path is filtered don't consider it
if (filter.has_value()) {
if (filter) {
// TODO(T167750650): This .get() will block Thrift threads and could lead
// to Queue Timeouts. Instead of calling .get(), we should chain futures
// together.
if (filter.value()
if (filter->get()
.getFilterCoverageForPath(entry.first, folly::StringPiece(""))
.get() == FilterCoverage::RECURSIVELY_FILTERED) {
continue;
@ -1703,11 +1703,11 @@ void sumUncommitedChanges(
}
for (const auto& name : range.uncleanPaths) {
if (filter.has_value()) {
if (filter) {
// TODO(T167750650): This .get() will block Thrift threads and could lead
// to Queue Timeouts. Instead of calling .get(), we should chain futures
// together.
if (filter.value()
if (filter->get()
.getFilterCoverageForPath(name, folly::StringPiece(""))
.get() == FilterCoverage::RECURSIVELY_FILTERED) {
continue;
@ -1961,11 +1961,11 @@ EdenServiceHandler::streamSelectedChangesSince(
auto caseSensitivity =
mountHandle.getEdenMount().getCheckoutConfig()->getCaseSensitive();
std::unique_ptr<WatchmanGlobFilter> filter =
std::make_unique<WatchmanGlobFilter>(
params->get_filter().get_globs(), caseSensitivity);
auto filter =
std::make_unique<GlobFilter>(params->get_globs(), caseSensitivity);
sumUncommitedChanges(*summed, *sharedPublisherLock, *filter);
sumUncommitedChanges(
*summed, *sharedPublisherLock, std::reference_wrapper(*filter));
if (summed->snapshotTransitions.size() > 1) {
// create filtered backing store

View File

@ -396,7 +396,7 @@ cpp_library(
"//eden/fs/store:rocksdb",
"//eden/fs/store:sqlite",
"//eden/fs/store:store",
"//eden/fs/store/filter:filters",
"//eden/fs/store/filter:glob_filter",
"//eden/fs/store/hg:hg_backing_store",
"//eden/fs/store/hg:hg_queued_backing_store",
"//eden/fs/telemetry:hive_logger",

View File

@ -90,16 +90,12 @@ struct StreamChangesSinceParams {
2: eden.JournalPosition fromPosition;
}
struct GlobFilter {
1: list<string> globs; // a list of globs
}
/**
* Argument to streamSelectedChangesSince API
*/
struct StreamSelectedChangesSinceParams {
1: StreamChangesSinceParams changesParams;
2: GlobFilter filter;
2: list<string> globs;
}
struct TraceTaskEventsRequest {}

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#include "eden/fs/store/filter/GlobFilter.h"
#include "eden/fs/utils/ImmediateFuture.h"
#include "eden/scm/lib/edenfs_ffi/src/lib.rs.h" // @manual
namespace facebook::eden {
ImmediateFuture<FilterCoverage> GlobFilter::getFilterCoverageForPath(
RelativePathPiece path,
folly::StringPiece) const {
return makeImmediateFutureWith([path = std::move(path), this] {
auto filterResult =
(*matcher_)->matches_directory(rust::Str{path.asString()});
switch (filterResult) {
case FilterDirectoryMatch::RecursivelyUnfiltered:
return FilterCoverage::RECURSIVELY_UNFILTERED;
case FilterDirectoryMatch::RecursivelyFiltered:
return FilterCoverage::RECURSIVELY_FILTERED;
case FilterDirectoryMatch::Unfiltered:
return FilterCoverage::UNFILTERED;
default:
throwf<std::invalid_argument>(
"Rust returned an invalid filter FilterDirectoryMatch result: {}",
static_cast<uint8_t>(filterResult));
}
});
}
} // namespace facebook::eden

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#pragma once
#include "eden/fs/store/filter/Filter.h"
#include "eden/fs/utils/CaseSensitivity.h"
#include "eden/fs/utils/EdenError.h"
#include "eden/scm/lib/edenfs_ffi/include/ffi.h"
#include "eden/scm/lib/edenfs_ffi/src/lib.rs.h" // @manual
namespace facebook::eden {
/**
* This class does filter based on glob patterns
*/
class GlobFilter : public Filter {
public:
explicit GlobFilter(
const std::vector<std::string>& globs,
const CaseSensitivity caseSensitive) {
rust::Vec<rust::String> rustGlobs;
for (const auto& glob : globs) {
rustGlobs.push_back(rust::String{glob});
}
// MatcherWrapper is the tunnel we use to communicate to rust to create
// TreeMatcher struct
// Note we use this method so that we don't need to expose complex struct
// TreeMatcher to cpp
std::shared_ptr<MatcherWrapper> wrapper =
std::make_shared<MatcherWrapper>();
create_tree_matcher(
rustGlobs, caseSensitive == CaseSensitivity::Sensitive, wrapper);
if (!wrapper->error_.empty()) {
// matcher creation failed, throw the error
throw newEdenError(
EdenErrorType::ARGUMENT_ERROR, wrapper->error_.c_str());
}
if (wrapper->matcher_ == nullptr) {
// neither matcher_ and error_ is set, this should never happen
throw newEdenError(
EdenErrorType::GENERIC_ERROR,
"Failed to create TreeMatcher, rust returned nullptr");
}
matcher_ = std::move(wrapper->matcher_);
} // namespace facebook::eden
/*
* Check whether a path is filtered by the given filter. NOTE: this method
* could potentially be slow. Returns a FilterCoverage enum that indicates the
* extent of the path's filtering.
*
* @param filterId, we use filterId as rootId here
*/
ImmediateFuture<FilterCoverage> getFilterCoverageForPath(
RelativePathPiece path,
folly::StringPiece filterId) const override;
private:
std::unique_ptr<rust::Box<facebook::eden::MercurialMatcher>> matcher_;
};
} // namespace facebook::eden

View File

@ -25,7 +25,7 @@ FilterCoverage determineFilterCoverage(
const rust::Box<facebook::eden::MercurialMatcher>& matcher,
std::string_view path) {
auto rustPath = rust::Str{path.data(), path.size()};
auto res = matcher->is_recursively_unfiltered(rustPath);
auto res = matcher->matches_directory(rustPath);
switch (res) {
case FilterDirectoryMatch::RecursivelyUnfiltered:
return FilterCoverage::RECURSIVELY_UNFILTERED;

View File

@ -19,23 +19,33 @@ cpp_library(
cpp_library(
name = "filters",
srcs = ["WatchmanGlobFilter.cpp"],
headers = [
"Filter.h",
"WatchmanGlobFilter.h",
],
deps = [
"//eden/fs/utils:glob",
],
exported_deps = [
"//eden/fs/utils:case_sensitivity",
"//eden/fs/utils:immediate_future",
"//eden/fs/utils:matcher",
"//eden/fs/utils:path",
"//folly:range",
],
)
cpp_library(
name = "glob_filter",
srcs = ["GlobFilter.cpp"],
headers = ["GlobFilter.h"],
deps = [
"//eden/fs/utils:immediate_future",
],
exported_deps = [
":filters",
"//eden/fs/utils:case_sensitivity",
"//eden/fs/utils:eden_error",
"//eden/scm/lib/edenfs_ffi:edenfs_ffi", # @manual
"//eden/scm/lib/edenfs_ffi:edenfs_ffi-wrapper", # @manual
"//eden/scm/lib/edenfs_ffi:edenfs_ffi@header", # @manual
],
)
cpp_library(
name = "hg_sparse_filter",
srcs = ["HgSparseFilter.cpp"],

View File

@ -1,25 +0,0 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#include "eden/fs/store/filter/WatchmanGlobFilter.h"
#include "eden/fs/utils/GlobResult.h"
#include "eden/fs/utils/ImmediateFuture.h"
namespace facebook::eden {
ImmediateFuture<FilterCoverage> WatchmanGlobFilter::getFilterCoverageForPath(
RelativePathPiece path,
folly::StringPiece) const {
for (const auto& matcher : matcher_) {
if (matcher.match(path.view())) {
return FilterCoverage::UNFILTERED;
}
}
return FilterCoverage::RECURSIVELY_FILTERED;
}
} // namespace facebook::eden

View File

@ -1,55 +0,0 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#pragma once
#include "eden/fs/store/filter/Filter.h"
#include "eden/fs/utils/CaseSensitivity.h"
#include "eden/fs/utils/GlobMatcher.h"
namespace facebook::eden {
class ObjectStore;
/**
* This class does filter based on glob patterns
*/
class WatchmanGlobFilter : public Filter {
public:
explicit WatchmanGlobFilter(
const std::vector<std::string>& globs,
CaseSensitivity caseSensitive) {
GlobOptions options = GlobOptions::DEFAULT;
if (caseSensitive == CaseSensitivity::Insensitive) {
options |= GlobOptions::CASE_INSENSITIVE;
}
for (auto& globStr : globs) {
auto matcher = GlobMatcher::create(globStr, options);
if (matcher.hasError()) {
throw std::runtime_error(
fmt::format("Invalid glob pattern {}", globStr));
}
matcher_.emplace_back(std::move(*matcher));
}
}
/*
* Check whether a path is filtered by the given filter. NOTE: this method
* could potentially be slow. Returns a FilterCoverage enum that indicates the
* extent of the path's filtering.
*
* @param filterId, we use filterId as rootId here
*/
ImmediateFuture<FilterCoverage> getFilterCoverageForPath(
RelativePathPiece path,
folly::StringPiece filterId) const override;
private:
std::vector<GlobMatcher> matcher_;
};
} // namespace facebook::eden

View File

@ -15,7 +15,7 @@
#include "eden/fs/store/BackingStore.h"
#include "eden/fs/store/ObjectStore.h"
#include "eden/fs/store/TreeCache.h"
#include "eden/fs/store/filter/WatchmanGlobFilter.h"
#include "eden/fs/store/filter/GlobFilter.h"
#include "eden/fs/telemetry/EdenStats.h"
#include "eden/fs/telemetry/NullStructuredLogger.h"
#include "eden/fs/testharness/FakeBackingStore.h"
@ -23,12 +23,11 @@
using namespace facebook::eden;
using namespace std::literals::chrono_literals;
TEST(WatchmanGlobFilterTest, testGlobbingNotExists) {
TEST(GlobFilterTest, testGlobbingNotExists) {
std::vector<std::string> globs = {"tree1/**/*.cpp", "*.cpp", "*.rs"};
auto executor = folly::ManualExecutor{};
auto filter =
std::make_shared<WatchmanGlobFilter>(globs, CaseSensitivity::Sensitive);
auto filter = std::make_shared<GlobFilter>(globs, CaseSensitivity::Sensitive);
auto pass =
filter
->getFilterCoverageForPath(
@ -40,12 +39,11 @@ TEST(WatchmanGlobFilterTest, testGlobbingNotExists) {
EXPECT_EQ(std::move(pass).get(0ms), FilterCoverage::RECURSIVELY_FILTERED);
}
TEST(WatchmanGlobFilterTest, testGlobbingExists) {
TEST(GlobFilterTest, testGlobbingExists) {
std::vector<std::string> globs = {"*.rs"};
auto executor = folly::ManualExecutor{};
auto filter =
std::make_shared<WatchmanGlobFilter>(globs, CaseSensitivity::Sensitive);
auto filter = std::make_shared<GlobFilter>(globs, CaseSensitivity::Sensitive);
auto pass = filter
->getFilterCoverageForPath(
RelativePathPiece{"content2.rs"}, folly::StringPiece(""))
@ -56,13 +54,12 @@ TEST(WatchmanGlobFilterTest, testGlobbingExists) {
EXPECT_EQ(std::move(pass).get(0ms), FilterCoverage::UNFILTERED);
}
TEST(WatchmanGlobFilterTest, testAnother) {
TEST(GlobFilterTest, testAnother) {
std::vector<std::string> globs = {"tree3/README"};
auto executor = folly::ManualExecutor{};
auto filter =
std::make_shared<WatchmanGlobFilter>(globs, CaseSensitivity::Sensitive);
auto filter = std::make_shared<GlobFilter>(globs, CaseSensitivity::Sensitive);
auto pass = filter
->getFilterCoverageForPath(
RelativePathPiece{"tree3/README"}, folly::StringPiece(""))
@ -73,13 +70,12 @@ TEST(WatchmanGlobFilterTest, testAnother) {
EXPECT_EQ(std::move(pass).get(0ms), FilterCoverage::UNFILTERED);
}
TEST(WatchmanGlobFilterTest, testGlobs) {
TEST(GlobFilterTest, testGlobs) {
std::vector<std::string> globs = {"**/README"};
auto executor = folly::ManualExecutor{};
auto filter =
std::make_shared<WatchmanGlobFilter>(globs, CaseSensitivity::Sensitive);
auto filter = std::make_shared<GlobFilter>(globs, CaseSensitivity::Sensitive);
auto pass = filter
->getFilterCoverageForPath(
RelativePathPiece{"tree3/README"}, folly::StringPiece(""))
@ -89,3 +85,45 @@ TEST(WatchmanGlobFilterTest, testGlobs) {
EXPECT_EQ(std::move(pass).get(0ms), FilterCoverage::UNFILTERED);
}
TEST(GlobFilterTest, testComplexGlobs) {
std::vector<std::string> globs = {"a/b/**/README"};
auto executor = folly::ManualExecutor{};
auto filter = std::make_shared<GlobFilter>(globs, CaseSensitivity::Sensitive);
auto pass = filter
->getFilterCoverageForPath(
RelativePathPiece{"a/b/c"}, folly::StringPiece(""))
.semi()
.via(folly::Executor::getKeepAliveToken(executor));
executor.drain();
EXPECT_EQ(std::move(pass).get(), FilterCoverage::UNFILTERED);
auto notPass = filter
->getFilterCoverageForPath(
RelativePathPiece{"a/c/b.cpp"}, folly::StringPiece(""))
.semi()
.via(folly::Executor::getKeepAliveToken(executor));
executor.drain();
EXPECT_EQ(std::move(notPass).get(), FilterCoverage::RECURSIVELY_FILTERED);
}
TEST(GlobFilterTest, testFiltered) {
std::vector<std::string> globs = {"a/b/**/README"};
auto executor = folly::ManualExecutor{};
auto filter = std::make_shared<GlobFilter>(globs, CaseSensitivity::Sensitive);
auto pass = filter
->getFilterCoverageForPath(
RelativePathPiece{"a/d/c"}, folly::StringPiece(""))
.semi()
.via(folly::Executor::getKeepAliveToken(executor));
executor.drain();
EXPECT_EQ(std::move(pass).get(), FilterCoverage::RECURSIVELY_FILTERED);
}

View File

@ -14,7 +14,7 @@ cpp_unittest(
"//eden/fs/store:backing_store_interface",
"//eden/fs/store:store",
"//eden/fs/store/filter:filtered_object_id",
"//eden/fs/store/filter:filters",
"//eden/fs/store/filter:glob_filter",
"//eden/fs/telemetry:structured_logger",
"//eden/fs/telemetry:telemetry",
"//eden/fs/testharness:fake_backing_store_and_tree_builder",

View File

@ -30,4 +30,22 @@ void set_matcher_promise_result(
void set_matcher_promise_error(
std::unique_ptr<MatcherPromise> promise,
rust::String error);
// synchronized version of above class
class MatcherWrapper {
public:
explicit MatcherWrapper() = default;
std::unique_ptr<rust::Box<MercurialMatcher>> matcher_ = nullptr;
rust::String error_;
};
void set_matcher_result(
std::shared_ptr<MatcherWrapper> wrapper,
rust::Box<::facebook::eden::MercurialMatcher>);
void set_matcher_error(
std::shared_ptr<MatcherWrapper> wrapper,
rust::String error);
} // namespace facebook::eden

View File

@ -25,4 +25,19 @@ void set_matcher_promise_error(
std::runtime_error(std::move(error).c_str()));
return;
}
void set_matcher_result(
std::shared_ptr<MatcherWrapper> wrapper,
rust::Box<::facebook::eden::MercurialMatcher> matcher) {
wrapper->matcher_ =
std::make_unique<rust::Box<MercurialMatcher>>(std::move(matcher));
return;
}
void set_matcher_error(
std::shared_ptr<MatcherWrapper> wrapper,
rust::String error) {
wrapper->error_ = std::move(error);
return;
}
} // namespace facebook::eden

View File

@ -12,6 +12,7 @@ use std::str::FromStr;
use anyhow::anyhow;
use anyhow::Context;
use cxx::SharedPtr;
use cxx::UniquePtr;
use manifest::FileMetadata;
use manifest::FsNodeMetadata;
@ -19,16 +20,19 @@ use manifest::Manifest;
use once_cell::sync::Lazy;
use parking_lot::Mutex;
use pathmatcher::DirectoryMatch;
use pathmatcher::TreeMatcher;
use repo::repo::Repo;
use sparse::Matcher;
use sparse::Root;
use types::HgId;
use types::RepoPath;
use types::RepoPathBuf;
use crate::ffi::set_matcher_error;
use crate::ffi::set_matcher_promise_error;
use crate::ffi::set_matcher_promise_result;
use crate::ffi::set_matcher_result;
use crate::ffi::MatcherPromise;
use crate::ffi::MatcherWrapper;
static REPO_HASHMAP: Lazy<Mutex<HashMap<PathBuf, Repo>>> = Lazy::new(|| Mutex::new(HashMap::new()));
@ -72,12 +76,12 @@ impl FromStr for FilterId {
// Therefore, MercurialMatcher simply serves as a wrapper around the actual Matcher object that's
// passed to C++ and back to Rust
pub struct MercurialMatcher {
matcher: Matcher,
matcher: Box<dyn pathmatcher::Matcher>,
}
impl MercurialMatcher {
// Returns true if the given path and all of its children are unfiltered
fn is_recursively_unfiltered(
fn matches_directory(
self: &MercurialMatcher,
path: &str,
) -> Result<ffi::FilterDirectoryMatch, anyhow::Error> {
@ -85,9 +89,14 @@ impl MercurialMatcher {
// This is tricky -- a filter file defines which files should be *excluded* from the repo.
// The filtered files are put in the [exclude] section of the file. So, if something is
// recursively unfiltered, then it means that there are no exclude patterns that match it.
let res = pathmatcher::Matcher::matches_directory(&self.matcher, repo_path)?;
let res = self.matcher.matches_directory(repo_path)?;
Ok(res.into())
}
fn matches_file(self: &MercurialMatcher, path: &str) -> Result<bool, anyhow::Error> {
let repo_path = RepoPath::from_str(path)?;
self.matcher.matches_file(repo_path)
}
}
// It's safe to move MatcherPromises between threads
@ -112,6 +121,9 @@ mod ffi {
#[namespace = "facebook::eden"]
type MatcherPromise;
#[namespace = "facebook::eden"]
type MatcherWrapper;
#[namespace = "facebook::eden"]
fn set_matcher_promise_result(
promise: UniquePtr<MatcherPromise>,
@ -120,6 +132,12 @@ mod ffi {
#[namespace = "facebook::eden"]
fn set_matcher_promise_error(promise: UniquePtr<MatcherPromise>, error: String);
#[namespace = "facebook::eden"]
fn set_matcher_result(wrapper: SharedPtr<MatcherWrapper>, value: Box<MercurialMatcher>);
#[namespace = "facebook::eden"]
fn set_matcher_error(wrapper: SharedPtr<MatcherWrapper>, error: String);
}
#[namespace = "facebook::eden"]
@ -138,10 +156,16 @@ mod ffi {
) -> Result<()>;
// Returns true if the given path and all of its children are unfiltered.
fn is_recursively_unfiltered(
self: &MercurialMatcher,
path: &str,
) -> Result<FilterDirectoryMatch>;
fn matches_directory(self: &MercurialMatcher, path: &str) -> Result<FilterDirectoryMatch>;
// Returns true if the given path is unfiltered.
fn matches_file(self: &MercurialMatcher, path: &str) -> Result<bool>;
fn create_tree_matcher(
globs: Vec<String>,
case_sensitive: bool,
matcher_wrapper: SharedPtr<MatcherWrapper>,
) -> Result<()>;
}
}
@ -155,6 +179,22 @@ impl From<DirectoryMatch> for ffi::FilterDirectoryMatch {
}
}
fn create_tree_matcher(
globs: Vec<String>,
case_sensitive: bool,
matcher_wrapper: SharedPtr<MatcherWrapper>,
) -> Result<(), anyhow::Error> {
let matcher = TreeMatcher::from_rules(globs.iter(), case_sensitive)?;
let mercurial_matcher = Ok(Box::new(MercurialMatcher {
matcher: Box::new(matcher),
}));
match mercurial_matcher {
Ok(m) => set_matcher_result(matcher_wrapper, m),
Err(e) => set_matcher_error(matcher_wrapper, e),
};
Ok(())
}
// As mentioned below, we return the MercurialMatcher via a promise to circumvent some async
// limitations in CXX. This function wraps the bulk of the Sparse logic and provides a single
// place for returning result/error info via the MatcherPromise.
@ -236,7 +276,9 @@ fn _profile_contents_from_repo(
let matcher = root.matcher(|_| Ok(Some(vec![])))?;
Ok(matcher)
})?;
Ok(Box::new(MercurialMatcher { matcher }))
Ok(Box::new(MercurialMatcher {
matcher: Box::new(matcher),
}))
}
// CXX doesn't allow async functions to be exposed to C++. This function wraps the bulk of the