introduce HgFilterManager that uses Rust FFI for filtering

Reviewed By: xavierd

Differential Revision: D47161219

fbshipit-source-id: 68d5853cc7b9d34e34d26172bd9bdd719cc2107b
This commit is contained in:
Michael Cuevas 2023-10-13 13:39:44 -07:00 committed by Facebook GitHub Bot
parent 63e65d3d08
commit 6050656edc
24 changed files with 978 additions and 88 deletions

View File

@ -6,6 +6,8 @@
*/
#include "eden/fs/store/FilteredBackingStore.h"
#include <eden/fs/model/ObjectId.h>
#include <eden/fs/store/BackingStore.h>
#include <folly/Varint.h>
#include <stdexcept>
#include <tuple>
@ -24,22 +26,36 @@ FilteredBackingStore::FilteredBackingStore(
FilteredBackingStore::~FilteredBackingStore() {}
bool FilteredBackingStore::pathAffectedByFilterChange(
ImmediateFuture<bool> FilteredBackingStore::pathAffectedByFilterChange(
RelativePathPiece pathOne,
RelativePathPiece pathTwo,
folly::StringPiece filterIdOne,
folly::StringPiece filterIdTwo) {
auto pathOneIncluded = filter_->isPathFiltered(pathOne, filterIdOne);
auto pathTwoIncluded = filter_->isPathFiltered(pathTwo, filterIdTwo);
// If a path is in neither or both filters, then it wouldn't be affected by
// any change (it is present in both or absent in both).
if (pathOneIncluded == pathTwoIncluded) {
return false;
}
std::vector<ImmediateFuture<bool>> futures;
futures.emplace_back(filter_->isPathFiltered(pathOne, filterIdOne));
futures.emplace_back(filter_->isPathFiltered(pathTwo, filterIdTwo));
return collectAll(std::move(futures))
.thenValue([](std::vector<folly::Try<bool>>&& isFilteredVec) {
// If we're unable to get the results from either future, we throw.
if (!isFilteredVec[0].hasValue() || !isFilteredVec[1].hasValue()) {
throw std::runtime_error{fmt::format(
"Unable to determine if paths were affected by filter change: {}",
isFilteredVec[0].hasException()
? isFilteredVec[0].exception().what()
: isFilteredVec[1].exception().what())};
}
// If a path is in only 1 filter, it is affected by the change in some way.
// This function doesn't determine how, just that the path is affected.
return true;
// If a path is in neither or both filters, then it wouldn't be affected
// by any change (it is present in both or absent in both).
if (isFilteredVec[0].value() == isFilteredVec[1].value()) {
return false;
}
// If a path is in only 1 filter, it is affected by the change in some
// way. This function doesn't determine how, just that the path is
// affected.
return true;
});
}
std::tuple<RootId, std::string> parseFilterIdFromRootId(const RootId& rootId) {
@ -114,25 +130,32 @@ ObjectComparison FilteredBackingStore::compareObjectsById(
filteredTwo.path(),
filteredOne.filter(),
filteredTwo.filter());
if (pathAffected) {
return ObjectComparison::Different;
} else {
// If the path wasn't affected by the filter change, we still can't be
// sure whether a subdirectory of that path was affected. Therefore we
// must return unknown if the underlying BackingStore reports that the
// objects are the same.
//
// TODO: We could improve this in the future by noting whether a tree has
// any subdirectories that are affected by filters. There are many ways to
// do this, but all of them are tricky to do. Let's save this for future
// optimization.
auto res = backingStore_->compareObjectsById(
filteredOne.object(), filteredTwo.object());
if (res == ObjectComparison::Identical) {
return ObjectComparison::Unknown;
if (pathAffected.isReady()) {
if (std::move(pathAffected).get()) {
return ObjectComparison::Different;
} else {
return res;
// If the path wasn't affected by the filter change, we still can't be
// sure whether a subdirectory of that path was affected. Therefore we
// must return unknown if the underlying BackingStore reports that the
// objects are the same.
//
// TODO: We could improve this in the future by noting whether a tree
// has any subdirectories that are affected by filters. There are many
// ways to do this, but all of them are tricky to do. Let's save this
// for future optimization.
auto res = backingStore_->compareObjectsById(
filteredOne.object(), filteredTwo.object());
if (res == ObjectComparison::Identical) {
return ObjectComparison::Unknown;
} else {
return res;
}
}
} else {
// We can't immediately tell if the path is affected by the filter
// change. Instead of chaining the future and queueing up a bunch of work,
// we'll return Unknown early.
return ObjectComparison::Unknown;
}
} else {
@ -141,29 +164,70 @@ ObjectComparison FilteredBackingStore::compareObjectsById(
}
}
PathMap<TreeEntry> FilteredBackingStore::filterImpl(
ImmediateFuture<std::unique_ptr<PathMap<TreeEntry>>>
FilteredBackingStore::filterImpl(
const TreePtr unfilteredTree,
RelativePathPiece treePath,
folly::StringPiece filterId) {
auto pathMap = PathMap<TreeEntry>{unfilteredTree->getCaseSensitivity()};
auto isFilteredFutures =
std::vector<ImmediateFuture<std::pair<RelativePath, bool>>>{};
// The FilterID is passed through multiple futures. Let's create a copy and
// pass it around to avoid lifetime issues.
auto filter = filterId.toString();
for (const auto& [path, entry] : *unfilteredTree) {
auto relPath = RelativePath{treePath} + path;
if (!filter_->isPathFiltered(relPath.piece(), filterId)) {
ObjectId oid;
if (entry.getType() == TreeEntryType::TREE) {
auto foid =
FilteredObjectId(relPath.piece(), filterId, entry.getHash());
oid = ObjectId{foid.getValue()};
} else {
auto foid = FilteredObjectId{entry.getHash()};
oid = ObjectId{foid.getValue()};
}
auto treeEntry = TreeEntry{std::move(oid), entry.getType()};
auto pair = std::pair{path, std::move(treeEntry)};
pathMap.insert(std::move(pair));
}
// TODO(cuev): I need to ensure that relPath survives until all the tree
// entries are created. I think the best way to do this is with a
// unique_ptr?
auto relPath = RelativePath{treePath + path};
auto filteredRes = filter_->isPathFiltered(relPath, filter);
auto fut =
std::move(filteredRes)
.thenValue([relPath = std::move(relPath)](bool isFiltered) mutable {
return std::pair(std::move(relPath), isFiltered);
});
isFilteredFutures.emplace_back(std::move(fut));
}
return pathMap;
return collectAll(std::move(isFilteredFutures))
.thenValue(
[unfilteredTree, filterId = std::move(filter)](
std::vector<folly::Try<std::pair<RelativePath, bool>>>&&
isFilteredVec) -> std::unique_ptr<PathMap<TreeEntry>> {
// This PathMap will only contain tree entries that aren't filtered
auto pathMap =
PathMap<TreeEntry>{unfilteredTree->getCaseSensitivity()};
for (auto&& isFiltered : isFilteredVec) {
if (isFiltered.hasException()) {
XLOGF(
ERR,
"Failed to determine if entry should be filtered: {}",
isFiltered.exception().what());
continue;
}
// This entry is not filtered. Re-add it to the new PathMap.
if (!isFiltered->second) {
auto relPath = std::move(isFiltered->first);
auto entry = unfilteredTree->find(relPath.basename().piece());
auto entryType = entry->second.getType();
ObjectId oid;
if (entryType == TreeEntryType::TREE) {
auto foid = FilteredObjectId(
relPath.piece(), filterId, entry->second.getHash());
oid = ObjectId{foid.getValue()};
} else {
auto foid = FilteredObjectId{entry->second.getHash()};
oid = ObjectId{foid.getValue()};
}
auto treeEntry = TreeEntry{std::move(oid), entryType};
auto pair =
std::pair{relPath.basename().copy(), std::move(treeEntry)};
pathMap.insert(std::move(pair));
}
}
return std::make_unique<PathMap<TreeEntry>>(std::move(pathMap));
});
}
ImmediateFuture<BackingStore::GetRootTreeResult>
@ -176,21 +240,29 @@ FilteredBackingStore::getRootTree(
"Getting rootTree {} with filter {}",
parsedRootId.value(),
filterId);
return backingStore_->getRootTree(parsedRootId, context)
.thenValue([filterId = filterId,
self = shared_from_this()](GetRootTreeResult rootTreeResult) {
// apply the filter to the tree
auto pathMap =
self->filterImpl(rootTreeResult.tree, RelativePath{""}, filterId);
auto rootFOID =
FilteredObjectId{RelativePath{""}, filterId, rootTreeResult.treeId};
return GetRootTreeResult{
std::make_shared<const Tree>(
std::move(pathMap), ObjectId{rootFOID.getValue()}),
ObjectId{rootFOID.getValue()},
};
});
auto fut = backingStore_->getRootTree(parsedRootId, context);
return std::move(fut).thenValue([filterId = std::move(filterId),
self = shared_from_this()](
GetRootTreeResult
rootTreeResult) mutable {
// apply the filter to the tree
auto filterFut =
self->filterImpl(rootTreeResult.tree, RelativePath{""}, filterId);
return std::move(filterFut).thenValue(
[self,
filterId = std::move(filterId),
treeId = std::move(rootTreeResult.treeId)](
std::unique_ptr<PathMap<TreeEntry>> pathMap) {
auto rootFOID = FilteredObjectId{RelativePath{""}, filterId, treeId};
auto res = GetRootTreeResult{
std::make_shared<const Tree>(
std::move(*pathMap), ObjectId{rootFOID.getValue()}),
ObjectId{rootFOID.getValue()},
};
pathMap.reset();
return res;
});
});
}
ImmediateFuture<std::shared_ptr<TreeEntry>>
@ -211,11 +283,17 @@ folly::SemiFuture<BackingStore::GetTreeResult> FilteredBackingStore::getTree(
return std::move(unfilteredTree)
.deferValue([self = shared_from_this(),
filteredId = std::move(filteredId)](GetTreeResult&& result) {
auto pathMap = self->filterImpl(
auto filterRes = self->filterImpl(
result.tree, filteredId.path(), filteredId.filter());
auto tree = std::make_shared<Tree>(
std::move(pathMap), ObjectId{filteredId.getValue()});
return GetTreeResult{std::move(tree), result.origin};
return std::move(filterRes)
.thenValue([filteredId, origin = result.origin](
std::unique_ptr<PathMap<TreeEntry>> pathMap) {
auto tree = std::make_shared<Tree>(
std::move(*pathMap), ObjectId{filteredId.getValue()});
pathMap.reset();
return GetTreeResult{std::move(tree), origin};
})
.semi();
});
}

View File

@ -130,7 +130,7 @@ class FilteredBackingStore
/*
* Does the actual filtering logic for tree and root-tree objects.
*/
PathMap<TreeEntry> filterImpl(
ImmediateFuture<std::unique_ptr<PathMap<TreeEntry>>> filterImpl(
const TreePtr unfilteredTree,
RelativePathPiece treePath,
folly::StringPiece filterId);
@ -139,7 +139,7 @@ class FilteredBackingStore
* Determine whether a path is affected by a filter change from One -> Two or
* vice versa.
*/
bool pathAffectedByFilterChange(
ImmediateFuture<bool> pathAffectedByFilterChange(
RelativePathPiece pathOne,
RelativePathPiece pathTwo,
folly::StringPiece filterIdOne,

View File

@ -3,17 +3,26 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
# TODO(cuev):
# This is the start of what's needed to support HgSparseFilters in the OSS
# build. However, I ran into the issue of needing to make CMake understand
# where to locate rust/cxx.h. This would require significant time, so it's
# now a problem for another time. It's not critical that we build this in
# our open source build just yet.
file(GLOB STORE_FILTER_SRCS "*.cpp")
list(
REMOVE_ITEM STORE_FILTER_SRCS
"${CMAKE_CURRENT_SOURCE_DIR}/HgSparseFilter.cpp"
)
add_library(
eden_store_filter STATIC
${STORE_FILTER_SRCS}
)
target_link_libraries(
eden_store_filter
PUBLIC
target_link_libraries(eden_store_filter PUBLIC
eden_config
eden_model
eden_utils
${RE2_LIBRARY}
)
# hg_eden_ffi
${RE2_LIBRARY})

View File

@ -9,6 +9,7 @@
#include <folly/Range.h>
#include "eden/fs/utils/ImmediateFuture.h"
#include "eden/fs/utils/PathFuncs.h"
// A null filter indicates that nothing should be filtered (i.e. no filter is
@ -24,7 +25,7 @@ class Filter {
/*
* Checks whether a path is filtered by the given filter.
*/
virtual bool isPathFiltered(
virtual ImmediateFuture<bool> isPathFiltered(
RelativePathPiece path,
folly::StringPiece filterId) = 0;
};

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#include "eden/fs/store/filter/HgSparseFilter.h"
#include "eden/fs/model/Hash.h"
#include "eden/fs/service/ThriftUtil.h"
#include "eden/scm/lib/edenfs-ffi/src/lib.rs.h" // @manual
#include <folly/futures/Future.h>
#include <exception>
#include <memory>
#include <string>
namespace facebook::eden {
namespace {
std::string parseFilterId(folly::StringPiece filterId) {
if (filterId == "null") {
return filterId.str();
}
auto separatorIdx = filterId.find(":");
auto commitId = hash20FromThrift(filterId.subpiece(separatorIdx + 1));
auto filterIdStr =
fmt::format("{}:{}", filterId.subpiece(0, separatorIdx), commitId);
return filterIdStr;
}
} // namespace
ImmediateFuture<bool> HgSparseFilter::isPathFiltered(
RelativePathPiece path,
folly::StringPiece id) {
// We check if the filter is cached. If so, we can avoid fetching the Filter
// Profile from Mercurial.
auto parsedFilterId = parseFilterId(id);
{
// TODO(cuev): I purposely don't hold the lock after checking the cache.
// This will lead to multiple threads adding to the cache, but it should be
// faster overall? This should be a one time occurrence per FilterId.
auto profiles = profiles_->rlock();
auto profileIt = profiles->find(parsedFilterId);
profiles.unlock();
if (profileIt != profiles->end()) {
return ImmediateFuture<bool>(
profileIt->second->is_path_excluded(path.asString()));
}
}
XLOGF(DBG8, "New filter id {}. Fetching from Mercurial.", id);
auto filterId = rust::Str{parsedFilterId.data(), parsedFilterId.size()};
auto pathToMount =
rust::Str{checkoutPath_.view().data(), checkoutPath_.view().size()};
auto [promise, rootFuture] =
folly::makePromiseContract<rust::Box<SparseProfileRoot>>();
auto rootPromise = std::make_shared<RootPromise>(std::move(promise));
profile_from_filter_id(filterId, pathToMount, std::move(rootPromise));
return ImmediateFuture{
std::move(rootFuture)
.deferValue(
[filterId = std::move(parsedFilterId),
path = path.copy(),
profilesLock = profiles_](rust::Box<SparseProfileRoot>&& res) {
auto profiles = profilesLock->wlock();
auto [profileIt, _] =
profiles->try_emplace(filterId, std::move(res));
return profileIt->second->is_path_excluded(path.asString());
})};
}
} // namespace facebook::eden

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#pragma once
#include <folly/container/F14Map.h>
#include <folly/logging/xlog.h>
#include <rust/cxx.h>
#include <memory>
#include <string>
#include "eden/fs/store/filter/Filter.h"
#include "eden/scm/lib/edenfs-ffi/src/ffi.h"
#include "eden/scm/lib/edenfs-ffi/src/lib.rs.h" // @manual
namespace facebook::eden {
// Extern "Rust"
struct SparseProfileRoot;
class HgSparseFilter : public Filter {
public:
explicit HgSparseFilter(AbsolutePath checkoutPath)
: checkoutPath_{std::move(checkoutPath)} {
profiles_ =
std::make_shared<folly::Synchronized<SparseMatcherMap>>(std::in_place);
}
~HgSparseFilter() override {}
/*
* Checks whether a path is filtered by the given filter.
*/
ImmediateFuture<bool> isPathFiltered(
RelativePathPiece path,
folly::StringPiece filterId) override;
private:
// TODO(cuev): We may want to use a F14FastMap instead since it doesn't matter
// if the string or rust::Box are moved. We'll hold off on investigating for
// now since in the future we may store a Matcher in the map instead of a
// SparseProfileRoot object. See fbcode/folly/container/F14.md for more info.
using SparseMatcherMap = folly::
F14NodeMap<std::string, rust::Box<facebook::eden::SparseProfileRoot>>;
std::shared_ptr<folly::Synchronized<SparseMatcherMap>> profiles_;
AbsolutePath checkoutPath_;
};
} // namespace facebook::eden

View File

@ -21,7 +21,34 @@ cpp_library(
name = "filters",
headers = ["Filter.h"],
exported_deps = [
"//eden/fs/utils:immediate_future",
"//eden/fs/utils:path",
"//folly:range",
],
)
cpp_library(
name = "hg_sparse_filter",
srcs = ["HgSparseFilter.cpp"],
headers = ["HgSparseFilter.h"],
os_deps = [(
"windows",
["fbsource//third-party/rust:cxx"],
)],
preferred_linkage = "static",
undefined_symbols = True,
deps = [
"//eden/fs/model:model",
"//eden/fs/service:thrift_util",
"//folly/futures:core",
],
exported_deps = [
"fbsource//third-party/rust:cxx-core",
":filters",
"//eden/scm/lib/edenfs-ffi:edenfs-ffi", # @manual
"//eden/scm/lib/edenfs-ffi:edenfs-ffi-wrapper",
"//eden/scm/lib/edenfs-ffi:edenfs-ffi@header", # @manual
"//folly/container:f14_hash",
"//folly/logging:logging",
],
)

View File

@ -4,6 +4,8 @@
# GNU General Public License version 2.
file(GLOB STORE_TEST_SRCS "*Test.cpp")
list(REMOVE_ITEM STORE_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/FilteredBackingStoreTest.cpp)
add_executable(
eden_store_test
${STORE_TEST_SRCS}

View File

@ -15,9 +15,17 @@
#include <folly/portability/GTest.h>
#include <folly/test/TestUtils.h>
#include "eden/fs/config/ReloadableConfig.h"
#include "eden/fs/model/TestOps.h"
#include "eden/fs/store/BackingStoreLogger.h"
#include "eden/fs/store/FilteredBackingStore.h"
#include "eden/fs/store/MemoryLocalStore.h"
#include "eden/fs/store/filter/HgSparseFilter.h"
#include "eden/fs/store/hg/HgImporter.h"
#include "eden/fs/store/hg/HgQueuedBackingStore.h"
#include "eden/fs/telemetry/NullStructuredLogger.h"
#include "eden/fs/testharness/FakeFilter.h"
#include "eden/fs/testharness/HgRepo.h"
#include "eden/fs/testharness/TestUtil.h"
#include "eden/fs/utils/PathFuncs.h"
@ -32,8 +40,45 @@ const char kTestFilter2[] = "football2";
const char kTestFilter3[] = "football3";
const char kTestFilter4[] = "shouldFilterZeroObjects";
const char kTestFilter5[] = "bazbar";
const char kTestFilter6[] =
"\
[include]\n\
*\n\
[exclude]\n\
foo\n\
dir2/README\n\
filtered_out";
class FilteredBackingStoreTest : public ::testing::Test {
struct TestRepo {
folly::test::TemporaryDirectory testDir{"eden_filtered_backing_store_test"};
AbsolutePath testPath = canonicalPath(testDir.path().string());
HgRepo repo{testPath + "repo"_pc};
RootId commit1;
Hash20 manifest1;
TestRepo() {
repo.hgInit(testPath + "cache"_pc);
// Filtered out by kTestFilter6
repo.mkdir("foo");
repo.writeFile("foo/bar.txt", "filtered out\n");
repo.mkdir("dir2");
repo.writeFile("dir2/README", "filtered out again\n");
repo.writeFile("filtered_out", "filtered out last\n");
// Not filtered out by kTestFilter6
repo.mkdir("src");
repo.writeFile("src/hello.txt", "world\n");
repo.writeFile("foo.txt", "foo\n");
repo.writeFile("bar.txt", "bar\n");
repo.writeFile("filter", kTestFilter6);
repo.hg("add");
commit1 = repo.commit("Initial commit");
manifest1 = repo.getManifestForCommit(commit1);
}
};
class FakeFilteredBackingStoreTest : public ::testing::Test {
protected:
void SetUp() override {
wrappedStore_ = std::make_shared<FakeBackingStore>();
@ -50,6 +95,45 @@ class FilteredBackingStoreTest : public ::testing::Test {
std::shared_ptr<FilteredBackingStore> filteredStore_;
};
struct HgFilteredBackingStoreTest : TestRepo, ::testing::Test {
HgFilteredBackingStoreTest() {}
void SetUp() override {
auto hgFilter = std::make_unique<HgSparseFilter>(repo.path().copy());
filteredStoreFFI_ = std::make_shared<FilteredBackingStore>(
wrappedStore_, std::move(hgFilter));
}
void TearDown() override {
filteredStoreFFI_.reset();
}
std::shared_ptr<ReloadableConfig> edenConfig{
std::make_shared<ReloadableConfig>(EdenConfig::createTestEdenConfig())};
EdenStatsPtr stats{makeRefPtr<EdenStats>()};
std::shared_ptr<MemoryLocalStore> localStore{
std::make_shared<MemoryLocalStore>(stats.copy())};
HgImporter importer{repo.path(), stats.copy()};
std::shared_ptr<FilteredBackingStore> filteredStoreFFI_;
std::unique_ptr<HgBackingStore> backingStore{std::make_unique<HgBackingStore>(
repo.path(),
&importer,
edenConfig,
localStore,
stats.copy())};
std::shared_ptr<HgQueuedBackingStore> wrappedStore_{
std::make_shared<HgQueuedBackingStore>(
localStore,
stats.copy(),
std::move(backingStore),
edenConfig,
std::make_shared<NullStructuredLogger>(),
std::make_unique<BackingStoreLogger>())};
};
/**
* Helper function to get blob contents as a string.
*
@ -61,7 +145,7 @@ std::string blobContents(const Blob& blob) {
return c.readFixedString(blob.getContents().computeChainDataLength());
}
TEST_F(FilteredBackingStoreTest, getNonExistent) {
TEST_F(FakeFilteredBackingStoreTest, getNonExistent) {
// getRootTree()/getTree()/getBlob() should throw immediately
// when called on non-existent objects.
EXPECT_THROW_RE(
@ -88,7 +172,7 @@ TEST_F(FilteredBackingStoreTest, getNonExistent) {
"tree 0.*1 not found");
}
TEST_F(FilteredBackingStoreTest, getBlob) {
TEST_F(FakeFilteredBackingStoreTest, getBlob) {
// Add a blob to the tree
auto hash = makeTestHash("1");
auto filteredHash = ObjectId{FilteredObjectId{hash}.getValue()};
@ -156,7 +240,7 @@ TEST_F(FilteredBackingStoreTest, getBlob) {
EXPECT_EQ("foobar", blobContents(*std::move(future6).get(0ms).blob));
}
TEST_F(FilteredBackingStoreTest, getTree) {
TEST_F(FakeFilteredBackingStoreTest, getTree) {
// Populate some files in the store
auto [runme, runme_id] =
wrappedStore_->putBlob("#!/bin/sh\necho 'hello world!'\n");
@ -284,7 +368,7 @@ TEST_F(FilteredBackingStoreTest, getTree) {
EXPECT_EQ(treeOID, std::move(future5).get(0ms).tree->getHash());
}
TEST_F(FilteredBackingStoreTest, getRootTree) {
TEST_F(FakeFilteredBackingStoreTest, getRootTree) {
// Set up one commit with a root tree
auto dir1Hash = makeTestHash("abc");
auto dir1FOID = FilteredObjectId(RelativePath{""}, kTestFilter1, dir1Hash);
@ -356,7 +440,7 @@ TEST_F(FilteredBackingStoreTest, getRootTree) {
"tree .* for commit .* not found");
}
TEST_F(FilteredBackingStoreTest, testCompareBlobObjectsById) {
TEST_F(FakeFilteredBackingStoreTest, testCompareBlobObjectsById) {
// Populate some blobs for testing.
//
// NOTE: FakeBackingStore is very dumb and implements its
@ -477,7 +561,7 @@ TEST_F(FilteredBackingStoreTest, testCompareBlobObjectsById) {
ObjectComparison::Identical);
}
TEST_F(FilteredBackingStoreTest, testCompareTreeObjectsById) {
TEST_F(FakeFilteredBackingStoreTest, testCompareTreeObjectsById) {
// Populate some blobs for testing.
//
// NOTE: FakeBackingStore is very dumb and implements its
@ -597,4 +681,45 @@ TEST_F(FilteredBackingStoreTest, testCompareTreeObjectsById) {
filteredStore_->compareObjectsById(grandchildOID, grandchildOID2) ==
ObjectComparison::Unknown);
}
const auto kTestTimeout = 10s;
TEST_F(HgFilteredBackingStoreTest, testMercurialFFI) {
// Set up one commit with a root tree
auto filterRelPath = RelativePath{"filter"};
auto rootFuture1 = filteredStoreFFI_->getRootTree(
RootId{FilteredBackingStore::createFilteredRootId(
commit1.value(),
fmt::format("{}:{}", filterRelPath.piece(), commit1.value()))},
ObjectFetchContext::getNullContext());
auto rootDirRes = std::move(rootFuture1).get(kTestTimeout);
// Get the object IDs of all the trees/files from the root dir.
auto [dir2Name, dir2Entry] = *rootDirRes.tree->find("dir2"_pc);
auto [srcName, srcEntry] = *rootDirRes.tree->find("src"_pc);
auto fooTxtFindRes = rootDirRes.tree->find("foo.txt"_pc);
auto barTxtFindRes = rootDirRes.tree->find("bar.txt"_pc);
auto fooFindRes = rootDirRes.tree->find("foo"_pc);
auto filteredOutFindRes = rootDirRes.tree->find("filtered_out"_pc);
// Get all the files from the trees from commit 1.
auto dir2Future = filteredStoreFFI_->getTree(
dir2Entry.getHash(), ObjectFetchContext::getNullContext());
auto dir2Res = std::move(dir2Future).get(kTestTimeout).tree;
auto readmeFindRes = dir2Res->find("README"_pc);
auto srcFuture = filteredStoreFFI_->getTree(
srcEntry.getHash(), ObjectFetchContext::getNullContext());
auto srcRes = std::move(srcFuture).get(kTestTimeout).tree;
auto helloFindRes = srcRes->find("hello.txt"_pc);
// We expect these files to be filtered
EXPECT_EQ(fooFindRes, rootDirRes.tree->cend());
EXPECT_EQ(readmeFindRes, dir2Res->cend());
EXPECT_EQ(filteredOutFindRes, rootDirRes.tree->cend());
// We expect these files to be present
EXPECT_NE(fooTxtFindRes, rootDirRes.tree->cend());
EXPECT_NE(barTxtFindRes, rootDirRes.tree->cend());
EXPECT_NE(helloFindRes, srcRes->cend());
}
} // namespace

View File

@ -22,12 +22,24 @@ cpp_unittest(
srcs = [
"FilteredBackingStoreTest.cpp",
],
# TODO(T159481899): make HgRepo compile/run on Windows
compatible_with = [
"ovr_config//os:linux",
"ovr_config//os:macos",
],
supports_static_listing = False,
deps = [
"//eden/fs/config:config",
"//eden/fs/model:testutil",
"//eden/fs/store:filtered_backing_store",
"//eden/fs/store:store",
"//eden/fs/store/filter:hg_sparse_filter",
"//eden/fs/store/hg:hg_importer",
"//eden/fs/store/hg:hg_queued_backing_store",
"//eden/fs/telemetry:structured_logger",
"//eden/fs/testharness:fake_backing_store_and_tree_builder",
"//eden/fs/testharness:fake_filter",
"//eden/fs/testharness:hg_repo",
"//eden/fs/testharness:test_util",
"//eden/fs/utils:path",
"//folly:varint",

View File

@ -21,9 +21,11 @@ class FakeFilter final : public Filter {
/*
* Checks whether a path is filtered by the given filter.
*/
bool isPathFiltered(RelativePathPiece path, folly::StringPiece filterId)
override {
return path.view().find(filterId) != std::string::npos;
ImmediateFuture<bool> isPathFiltered(
RelativePathPiece path,
folly::StringPiece filterId) override {
return ImmediateFuture<bool>{
path.view().find(filterId) != std::string::npos};
}
};
} // namespace facebook::eden

View File

@ -171,6 +171,17 @@ Hash20 HgRepo::getManifestForCommit(const RootId& commit) {
return Hash20{folly::rtrimWhitespace(output)};
}
Hash20 HgRepo::getHgIdForFile(
const RootId& commit,
RelativePathPiece repoRelPath) {
auto command = fmt::format(
"print(hex(repo['{}']['{}'].filenode()))",
commit.value(),
repoRelPath.asString());
auto output = hg("dbsh", "-c", std::move(command));
return Hash20{folly::rtrimWhitespace(output)};
}
void HgRepo::mkdir(RelativePathPiece path, mode_t permissions) {
auto fullPath = path_ + path;
auto rc = ::mkdir(fullPath.value().c_str(), permissions);

View File

@ -100,6 +100,7 @@ class HgRepo {
RootId commit(folly::StringPiece message);
Hash20 getManifestForCommit(const RootId& commit);
Hash20 getHgIdForFile(const RootId& commit, RelativePathPiece repoRelPath);
void mkdir(RelativePathPiece path, mode_t permissions = 0755);
void mkdir(folly::StringPiece path, mode_t permissions = 0755) {

View File

@ -21,6 +21,7 @@ cpp_library(
"//eden/fs/journal:journal",
"//eden/fs/notifications:command_notifier",
"//eden/fs/service:pretty_printers",
"//eden/fs/store:backing_store_interface",
"//eden/fs/store:store",
"//eden/fs/telemetry:hive_logger",
"//eden/fs/telemetry:structured_logger",
@ -193,8 +194,8 @@ cpp_library(
exported_deps = [
":stored_object",
"//eden/fs/model:model",
"//eden/fs/store:backing_store_interface",
"//eden/fs/store:context",
"//eden/fs/store:store",
"//eden/fs/utils:path",
"//eden/fs/utils:utils",
"//folly:exception_wrapper",
@ -266,7 +267,7 @@ cpp_library(
"//eden/fs/config:config",
"//eden/fs/service:server",
"//eden/fs/service:startup_logger",
"//eden/fs/store:store",
"//eden/fs/store:backing_store_interface",
"//eden/fs/telemetry:activity_recorder",
"//eden/fs/telemetry:hive_logger",
"//eden/fs/telemetry:log_info",

View File

@ -50,6 +50,7 @@ members = [
"lib/edenapi/types",
"lib/edenapi/types/proc_macros",
"lib/edenfs-client",
"lib/edenfs-ffi",
"lib/encoding",
"lib/exchange",
"lib/formatter",

View File

@ -0,0 +1,82 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
find_program(CARGO_COMMAND cargo REQUIRED)
set(crate_name edenfs-ffi)
if(DEFINED ARG_FEATURES)
set(cargo_flags build
$<IF:$<CONFIG:Debug>,,--release>
-p ${crate_name}
--features ${ARG_FEATURES})
else()
set(cargo_flags build $<IF:$<CONFIG:Debug>,,--release> -p ${crate_name})
endif()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(TARGET_DIR "debug")
else ()
set(TARGET_DIR "release")
endif ()
if(USE_CARGO_VENDOR)
set(extra_cargo_env "CARGO_HOME=${RUST_CARGO_HOME}")
set(cargo_flags ${cargo_flags})
endif()
set(CARGO_MANIFEST ${CMAKE_SOURCE_DIR}/eden/scm/lib/edenfs-ffi/Cargo.toml)
set(CARGO_TARGET_DIR ${CMAKE_SOURCE_DIR}/eden/scm/lib/edenfs-ffi/target)
set(FFI_SOURCE_FILE ${CMAKE_SOURCE_DIR}/eden/scm/lib/edenfs-ffi/src/lib.rs)
set(FFI_BRIDGE_CPP ${CARGO_TARGET_DIR}/cxxbridge/edenfs-ffi/src/lib.rs.cc)
set(FFI_LIB ${CARGO_TARGET_DIR}/${TARGET_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}edenfs-ffi${CMAKE_STATIC_LIBRARY_SUFFIX})
# Add a custom command that builds the rust crate and generates C++ bridge code
add_custom_command(
OUTPUT ${FFI_BRIDGE_CPP} ${FFI_LIB}
COMMAND ${extra_cargo_env}
${CARGO_COMMAND}
${cargo_flags}
--manifest-path ${CARGO_MANIFEST}
DEPENDS ${FFI_SOURCE_FILE}
USES_TERMINAL
COMMENT "Running cargo..."
)
file(GLOB STORE_FFI_SRCS ${FFI_BRIDGE_CPP})
add_library(
hg_eden_ffi STATIC
${FFI_BRIDGE_CPP}
)
target_include_directories(
hg_eden_ffi
PRIVATE
include/
${CARGO_TARGET_DIR}/eden/scm/lib/edenfs-ffi/src/
)
target_link_libraries(hg_eden_ffi PUBLIC
eden_model
eden_utils
${RE2_LIBRARY}
${FFI_LIB})
set_target_properties(
hg_eden_ffi
PROPERTIES ADDITIONAL_CLEAN_FILES ${CARGO_TARGET_DIR}
)
# Windows-only configuration
if(WIN32)
target_link_libraries(hg_eden_ffi userenv ws2_32 bcrypt)
set_target_properties(
hg_eden_ffi
PROPERTIES
MSVC_RUNTIME_LIBRARY "MultiThreadedDLL"
RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}
RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}
)
endif()

View File

@ -0,0 +1,26 @@
# @generated by autocargo
[package]
name = "edenfs-ffi"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["lib", "staticlib"]
[dependencies]
anyhow = "=1.0.72"
async-runtime = { version = "0.1.0", path = "../async-runtime" }
cxx = "1.0.100"
futures = { version = "0.3.28", features = ["async-await", "compat"] }
identity = { version = "0.1.0", path = "../identity" }
manifest = { version = "0.1.0", path = "../manifest" }
manifest-tree = { version = "0.1.0", path = "../manifest-tree" }
once_cell = "1.12"
repo = { version = "0.1.0", path = "../repo" }
sparse = { version = "0.1.0", path = "../sparse" }
tokio = { version = "1.29.1", features = ["full", "test-util", "tracing"] }
types = { version = "0.1.0", path = "../types" }
[build-dependencies]
cxx-build = "1.0.100"

View File

@ -0,0 +1,54 @@
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
load("@fbcode_macros//build_defs:rust_library.bzl", "rust_library")
oncall("source_control")
rust_library(
name = "edenfs-ffi",
srcs = glob(["src/**/*.rs"]),
autocargo = {
"cargo_target_config": {
"crate_type": [
"staticlib",
"lib",
],
},
"cargo_toml_config": {
"dependencies_override": {
"build-dependencies": {
"cxx-build": {"optional": False},
},
},
},
},
cpp_deps = [":edenfs-ffi-wrapper"],
crate_root = "src/lib.rs",
cxx_bridge = "src/lib.rs",
deps = [
"fbsource//third-party/rust:anyhow",
"fbsource//third-party/rust:cxx",
"fbsource//third-party/rust:futures",
"fbsource//third-party/rust:once_cell",
"fbsource//third-party/rust:tokio",
"//eden/scm/lib/async-runtime:async-runtime",
"//eden/scm/lib/identity:identity",
"//eden/scm/lib/manifest:manifest",
"//eden/scm/lib/manifest-tree:manifest-tree",
"//eden/scm/lib/repo:repo",
"//eden/scm/lib/sparse:sparse",
"//eden/scm/lib/types:types",
],
)
cpp_library(
name = "edenfs-ffi-wrapper",
srcs = ["src/ffi.cpp"],
headers = ["src/ffi.h"],
preferred_linkage = "static",
undefined_symbols = True,
exported_deps = [
"fbsource//third-party/rust:cxx-core",
":edenfs-ffi@header", # @manual
"//folly/futures:core",
],
)

View File

@ -0,0 +1,11 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
fn main() {
cxx_build::bridge("src/lib.rs");
println!("cargo:rerun-if-changed=src/lib.rs");
}

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#include "eden/scm/lib/edenfs-ffi/src/ffi.h"
#include <memory>
#include <utility>
namespace facebook::eden {
void set_root_promise_result(
std::shared_ptr<RootPromise> rootPromise,
rust::Box<SparseProfileRoot> root) {
rootPromise->promise.setValue(std::move(root));
return;
}
void set_root_promise_error(
std::shared_ptr<RootPromise> rootPromise,
rust::String error) {
rootPromise->promise.setException(
std::runtime_error(std::move(error).c_str()));
return;
}
} // namespace facebook::eden

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#pragma once
#include <folly/futures/Future.h>
#include <memory>
#include "rust/cxx.h"
namespace facebook::eden {
struct SparseProfileRoot;
class RootPromise {
public:
explicit RootPromise(folly::Promise<rust::Box<SparseProfileRoot>> root)
: promise(std::move(root)) {}
folly::Promise<rust::Box<SparseProfileRoot>> promise;
};
void set_root_promise_result(
std::shared_ptr<RootPromise> promise,
rust::Box<::facebook::eden::SparseProfileRoot>);
void set_root_promise_error(
std::shared_ptr<RootPromise> promise,
rust::String error);
} // namespace facebook::eden

View File

@ -0,0 +1,245 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
use std::collections::HashMap;
use std::fmt;
use std::path::PathBuf;
use std::str::FromStr;
use anyhow::anyhow;
use anyhow::Context;
use async_runtime::spawn;
use async_runtime::spawn_blocking;
use cxx::SharedPtr;
use futures::StreamExt;
use manifest::FileMetadata;
use manifest::FsNodeMetadata;
use manifest::Manifest;
use manifest_tree::TreeManifest;
use once_cell::sync::Lazy;
use repo::repo::Repo;
use sparse::Root;
use tokio::sync::Mutex;
use types::HgId;
use types::Key;
use types::RepoPathBuf;
use crate::ffi::set_root_promise_error;
use crate::ffi::set_root_promise_result;
use crate::ffi::RootPromise;
static REPO_HASHMAP: Lazy<Mutex<HashMap<PathBuf, Repo>>> = Lazy::new(|| Mutex::new(HashMap::new()));
// A helper class to parse/validate FilterIDs that are passed to Mercurial
struct FilterId {
pub repo_path: RepoPathBuf,
pub hg_id: HgId,
src: String,
}
impl fmt::Display for FilterId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", &self.src)
}
}
impl FromStr for FilterId {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let id_components = s.split(':').collect::<Vec<_>>();
if id_components.len() != 2 {
return Err(anyhow!(
"Invalid filter id, must be in the form {{filter_path}}:{{hgid}}. Found: {}",
s
));
}
let repo_path =
RepoPathBuf::from_string(id_components[0].to_string()).with_context(|| {
anyhow!(
"Invalid repo path found in FilterId: {:?}",
id_components[0]
)
})?;
let hg_id = HgId::from_str(id_components[1])
.with_context(|| anyhow!("Invalid HgID found in FilterId: {:?}", id_components[1]))?;
Ok(FilterId {
repo_path,
hg_id,
src: s.to_string(),
})
}
}
// CXX only allows exposing structures that are defined in the bridge crate.
// Therefore, SparseProfileRoot simply serves as a wrapper around the actual Root object that's
// passed to C++ and back to Rust
pub struct SparseProfileRoot {
root: Root,
}
impl SparseProfileRoot {
// Returns true if the profile excludes the given path.
fn is_path_excluded(self: &SparseProfileRoot, path: &str) -> bool {
self.root.is_path_excluded(path)
}
}
// It's safe to move RootPromises between threads
unsafe impl Send for RootPromise {}
unsafe impl Sync for RootPromise {}
#[cxx::bridge]
mod ffi {
unsafe extern "C++" {
include!("eden/scm/lib/edenfs-ffi/src/ffi.h");
#[namespace = "facebook::eden"]
type RootPromise;
#[namespace = "facebook::eden"]
fn set_root_promise_result(promise: SharedPtr<RootPromise>, value: Box<SparseProfileRoot>);
#[namespace = "facebook::eden"]
fn set_root_promise_error(promise: SharedPtr<RootPromise>, error: String);
}
#[namespace = "facebook::eden"]
extern "Rust" {
type SparseProfileRoot;
// Takes a filter_id that corresponds to a filter file that's checked
// into the repo.
//
// Note: The corresponding call in C++ will throw if the Rust function
// returns an error result.
fn profile_from_filter_id(
id: &str,
checkout_path: &str,
promise: SharedPtr<RootPromise>,
) -> Result<()>;
// Returns true if the profile excludes the given path.
fn is_path_excluded(self: &SparseProfileRoot, path: &str) -> bool;
}
}
// As mentioned below, we return the SparseProfileRoot via a promise to circumvent some async
// limitations in CXX. This function wraps the bulk of the Sparse logic and provides a single
// place for returning result/error info via the RootPromise.
async fn profile_contents_from_repo(
id: FilterId,
abs_repo_path: PathBuf,
promise: SharedPtr<RootPromise>,
) {
match _profile_contents_from_repo(id, abs_repo_path).await {
Ok(res) => {
set_root_promise_result(promise, res);
}
Err(e) => {
set_root_promise_error(promise, format!("Failed to get filter: {}", e));
}
}
}
// Fetches the content of a filter file and turns it into a SparseProfileRoot
async fn _profile_contents_from_repo(
id: FilterId,
abs_repo_path: PathBuf,
) -> Result<Box<SparseProfileRoot>, anyhow::Error> {
let mut repo_hash = REPO_HASHMAP.lock().await;
if !repo_hash.contains_key(&abs_repo_path) {
// Load the repo and store it for later use
let repo = Repo::load(&abs_repo_path, &[], &[]).with_context(|| {
anyhow!("failed to load Repo object for {}", abs_repo_path.display())
})?;
repo_hash.insert(abs_repo_path.clone(), repo);
}
let repo = repo_hash
.get_mut(&abs_repo_path)
.expect("repo to be loaded");
let tree_store = repo
.tree_store()
.context("failed to get TreeStore from Repo object")?;
let repo_store = repo
.file_store()
.context("failed to get FileStore from Repo object")?;
// Create the tree manifest for the root tree of the repo
let manifest_id = repo
.get_root_tree_id(id.hg_id)
.await
.with_context(|| anyhow!("Failed to get root tree id for commit {:?}", &id.hg_id))?;
let tree_manifest = TreeManifest::durable(tree_store, manifest_id);
// Get the metadata of the filter file and verify it's a valid file.
let p = id.repo_path.clone();
let metadata = spawn_blocking(move || tree_manifest.get(&p)).await??;
let file_id = match metadata {
None => {
return Err(anyhow!("{:?} is not a valid filter file", id.repo_path));
}
Some(fs_node) => match fs_node {
FsNodeMetadata::File(FileMetadata { hgid, .. }) => hgid,
FsNodeMetadata::Directory(_) => {
return Err(anyhow!(
"{:?} is a directory, not a valid filter file",
id.repo_path
));
}
},
};
// TODO(cuev): Is there a better way to do this?
let mut stream = repo_store
.read_file_contents(vec![Key::new(id.repo_path.clone(), file_id)])
.await;
match stream.next().await {
Some(Ok((bytes, _key))) => {
let bytes = bytes.into_vec();
Ok(Box::new(SparseProfileRoot {
root: Root::from_bytes(bytes, id.repo_path.to_string()).unwrap(),
}))
}
Some(Err(err)) => Err(err),
None => Err(anyhow!("no contents for filter file {}", &id.repo_path)),
}
}
// CXX doesn't allow async functions to be exposed to C++. This function wraps the bulk of the
// Sparse Profile creation logic. We spawn a task to complete the async work, and then return the
// value to C++ via a promise.
pub fn profile_from_filter_id(
id: &str,
checkout_path: &str,
promise: SharedPtr<RootPromise>,
) -> Result<(), anyhow::Error> {
// Parse the FilterID
let filter_id = FilterId::from_str(id)?;
// TODO(cuev): Is this even worth doing?
// We need to verify the checkout exists. The passed in checkout_path
// should correspond to a valid hg/sl repo that Mercurial is aware of.
let abs_repo_path = PathBuf::from(checkout_path);
if identity::sniff_dir(&abs_repo_path).is_err() {
return Err(anyhow!(
"{} is not a valid hg repo",
abs_repo_path.display()
));
}
// If we've already loaded a filter from this repo before, we can skip Repo
// object creation. Otherwise, we need to pay the 1 time cost of creating
// the Repo object.
spawn(profile_contents_from_repo(
filter_id,
abs_repo_path,
promise,
));
Ok(())
}

View File

@ -700,7 +700,7 @@ impl Repo {
)?)
}
async fn get_root_tree_id(&mut self, commit_id: HgId) -> Result<HgId> {
pub async fn get_root_tree_id(&mut self, commit_id: HgId) -> Result<HgId> {
let commit_store = self.dag_commits()?.read().to_dyn_read_root_tree_ids();
let tree_ids = commit_store.read_root_tree_ids(vec![commit_id]).await?;
Ok(tree_ids[0].1)

View File

@ -14,6 +14,7 @@ use std::io;
use std::io::BufRead;
use std::io::BufReader;
use futures::executor;
use futures::future::BoxFuture;
use futures::future::FutureExt;
use futures::Future;
@ -52,13 +53,13 @@ pub struct Root {
}
#[derive(Debug, Clone, PartialEq)]
enum Pattern {
pub enum Pattern {
Include(String),
Exclude(String),
}
#[derive(Debug)]
enum ProfileEntry {
pub enum ProfileEntry {
// Pattern plus additional source for this rule (e.g. "hgrc.dynamic").
Pattern(Pattern, Option<String>),
Profile(String),
@ -231,6 +232,19 @@ impl Root {
Ok(Matcher::new(matchers, rule_origins))
}
// Returns true if the profile excludes the given path.
pub fn is_path_excluded(self: &Root, path: &str) -> bool {
// TODO(cuev): Add a warning when sparse profiles contain a %include.
// Filters don't support that.
let matcher =
executor::block_on(
async move { self.matcher(|_| async move { Ok(Some(vec![])) }).await },
)
.unwrap();
let repo_path = RepoPath::from_str(path).unwrap();
!matcher.matches(repo_path).unwrap_or(true)
}
}
impl Profile {