augment JournalDelta with unclean paths on snapshot hash change

Summary:
We were previously generating a simple JournalDelta consisting of
just the from/to snapshot hashes.  This is great from a `!O(repo)` perspective
when recording what changed but makes it difficult for clients downstream
to reason about changes that are not tracked in source control.

This diff adds a concept of `uncleanPaths` to the journal; these are paths
that we think are/were different from the hashes in the journal entry.

Since JournalDelta needs to be able to be merged I've opted for a simple
list of the paths that have a differing status; I'm not including all of
the various dirstate states for this because it is not obvious how to
reconcile the state across successive snapshot change events.

The `uncleanPaths` set is populated with an initial set of different paths as
the first part of the checkout call (prior to changing the hash), and then is
updated after the hash has changed to capture any additional differences.

Care needs to be taken to avoid recursively attempting to grab the parents lock
so I'm replicating just a little bit of the state management glue in the
`performDiff` method.

The Journal was not setting the from/to snapshot hashes when merging deltas.
This manifested in the watchman integration tests; we'd see the null revision
as the `from` and the `to` revision held the `from` revision(!).

On the watchman side we need to ask source control to expand the list of
files that changed when the from/to hashes are different; I've added code
to handle this.  This doesn't do anything smart in the case that the
source control aware queries are in use.  We'll look at that in a following
diff as it isn't strictly eden specific.

`watchman clock` was returning a basically empty clock unconditionally,
which meant that most since queries would report everything since the start
of time.  This is most likely contributing to poor Buck performance, although
I have not investigated the performance aspect of this.  It manifested itself
in the watchman integration tests.

Reviewed By: simpkins

Differential Revision: D5896494

fbshipit-source-id: a88be6448862781a1d8f5e15285ca07b4240593a
This commit is contained in:
Wez Furlong 2017-10-16 22:22:18 -07:00 committed by Facebook Github Bot
parent 73dc6620fc
commit 25a9786ca5
7 changed files with 194 additions and 120 deletions

View File

@ -27,8 +27,8 @@ class ObjectStore;
*/
class DiffContext {
public:
DiffContext(InodeDiffCallback* cb, bool listIgn, ObjectStore* os)
: callback{cb}, store{os}, listIgnored{listIgn} {
DiffContext(InodeDiffCallback* cb, bool listIgnored, ObjectStore* os)
: callback{cb}, store{os}, listIgnored{listIgnored} {
// TODO: Load the system-wide ignore settings and user-specific
// ignore settings into rootIgnore_.
}

View File

@ -28,6 +28,7 @@
#include "eden/fs/inodes/Dirstate.h"
#include "eden/fs/inodes/EdenDispatcher.h"
#include "eden/fs/inodes/FileInode.h"
#include "eden/fs/inodes/InodeDiffCallback.h"
#include "eden/fs/inodes/InodeError.h"
#include "eden/fs/inodes/InodeMap.h"
#include "eden/fs/inodes/Overlay.h"
@ -56,6 +57,81 @@ DEFINE_int32(fuseNumThreads, 16, "how many fuse dispatcher threads to spawn");
namespace facebook {
namespace eden {
namespace {
/** Helper for computing unclean paths when changing parents
*
* This InodeDiffCallback instance is used to compute the set
* of unclean files before and after actions that change the
* current commit hash of the mount point.
*/
class JournalDiffCallback : public InodeDiffCallback {
public:
explicit JournalDiffCallback()
: data_{folly::in_place, make_unique<JournalDelta>()} {}
void ignoredFile(RelativePathPiece) override {}
void untrackedFile(RelativePathPiece) override {}
void removedFile(
RelativePathPiece path,
const TreeEntry& /* sourceControlEntry */) override {
data_.wlock()->journalDelta->uncleanPaths.insert(path.copy());
}
void modifiedFile(
RelativePathPiece path,
const TreeEntry& /* sourceControlEntry */) override {
data_.wlock()->journalDelta->uncleanPaths.insert(path.copy());
}
void diffError(RelativePathPiece path, const folly::exception_wrapper& ew)
override {
// TODO: figure out what we should do to notify the user, if anything.
// perhaps we should just add this path to the list of unclean files?
XLOG(WARNING) << "error computing journal diff data for " << path << ": "
<< folly::exceptionStr(ew);
}
FOLLY_NODISCARD Future<folly::Unit> performDiff(
ObjectStore* objectStore,
TreeInodePtr rootInode,
std::shared_ptr<const Tree> rootTree) {
auto diffContext =
make_unique<DiffContext>(this, /* listIgnored = */ false, objectStore);
auto rawContext = diffContext.get();
return rootInode
->diff(
rawContext,
RelativePathPiece{},
std::move(rootTree),
diffContext->getToplevelIgnore(),
false)
.ensure([diffContext = std::move(diffContext)]() {});
}
/** moves the JournalDelta information out of this diff callback instance,
* rendering it invalid */
std::unique_ptr<JournalDelta> stealJournalDelta() {
std::unique_ptr<JournalDelta> result;
std::swap(result, data_.wlock()->journalDelta);
return result;
}
private:
struct Data {
explicit Data(unique_ptr<JournalDelta>&& journalDelta)
: journalDelta(std::move(journalDelta)) {}
unique_ptr<JournalDelta> journalDelta;
};
folly::Synchronized<Data> data_;
};
} // namespace
static constexpr folly::StringPiece kEdenStracePrefix = "eden.strace.";
// We compute this when the process is initialized, but stash a copy
@ -371,22 +447,31 @@ folly::Future<std::vector<CheckoutConflict>> EdenMount::checkout(
auto fromTreeFuture = objectStore_->getTreeForCommit(oldParents.parent1());
auto toTreeFuture = objectStore_->getTreeForCommit(snapshotHash);
return folly::collect(fromTreeFuture, toTreeFuture)
.then(
[this, ctx](std::tuple<shared_ptr<const Tree>, shared_ptr<const Tree>>
treeResults) {
auto& fromTree = std::get<0>(treeResults);
auto& toTree = std::get<1>(treeResults);
auto journalDiffCallback = std::make_shared<JournalDiffCallback>();
ctx->start(this->acquireRenameLock());
return this->getRootInode()
->checkout(ctx.get(), fromTree, toTree)
.then([toTree]() mutable { return toTree; });
})
.then([this](std::shared_ptr<const Tree> toTree) {
return dirstate_->onSnapshotChanged(toTree.get());
return folly::collect(fromTreeFuture, toTreeFuture)
.then([this, ctx, journalDiffCallback](
std::tuple<shared_ptr<const Tree>, shared_ptr<const Tree>>
treeResults) {
auto& fromTree = std::get<0>(treeResults);
auto& toTree = std::get<1>(treeResults);
return journalDiffCallback
->performDiff(getObjectStore(), getRootInode(), fromTree)
.then([this, ctx, fromTree, toTree]() {
ctx->start(this->acquireRenameLock());
return this->getRootInode()
->checkout(ctx.get(), fromTree, toTree)
.then([toTree]() mutable { return toTree; });
});
})
.then([this, ctx, oldParents, snapshotHash]() {
.then([this](std::shared_ptr<const Tree> toTree) {
return dirstate_->onSnapshotChanged(toTree.get()).then([toTree] {
return toTree;
});
})
.then([this, ctx, oldParents, snapshotHash, journalDiffCallback](
std::shared_ptr<const Tree> toTree) {
// Save the new snapshot hash
XLOG(DBG1) << "updating snapshot for " << this->getPath() << " from "
<< oldParents << " to " << snapshotHash;
@ -394,17 +479,22 @@ folly::Future<std::vector<CheckoutConflict>> EdenMount::checkout(
auto conflicts = ctx->finish(snapshotHash);
// Write a journal entry
// TODO: We don't include any file changes for now. We'll need to
// figure out the desired data to pass to watchman. We intentionally
// don't want to give it the full list of files that logically
// changed--we intentionally don't process files that were changed but
// have never been accessed.
auto journalDelta = make_unique<JournalDelta>();
journalDelta->fromHash = oldParents.parent1();
journalDelta->toHash = snapshotHash;
journal_.wlock()->addDelta(std::move(journalDelta));
return journalDiffCallback
->performDiff(getObjectStore(), getRootInode(), std::move(toTree))
.then([this,
conflicts,
journalDiffCallback,
oldParents,
snapshotHash]() {
return conflicts;
auto journalDelta = journalDiffCallback->stealJournalDelta();
journalDelta->fromHash = oldParents.parent1();
journalDelta->toHash = snapshotHash;
journal_.wlock()->addDelta(std::move(journalDelta));
return conflicts;
});
});
}
@ -449,20 +539,16 @@ Future<Unit> EdenMount::resetParents(const ParentCommits& parents) {
.then([this](std::shared_ptr<const Tree> rootTree) {
return dirstate_->onSnapshotChanged(rootTree.get());
})
.then([
this,
parents,
oldParents,
parentsLock = std::move(parentsLock)
]() {
this->config_->setParentCommits(parents);
parentsLock->parents.setParents(parents);
.then(
[this, oldParents, parents, parentsLock = std::move(parentsLock)]() {
this->config_->setParentCommits(parents);
parentsLock->parents.setParents(parents);
auto journalDelta = make_unique<JournalDelta>();
journalDelta->fromHash = oldParents.parent1();
journalDelta->toHash = parents.parent1();
journal_.wlock()->addDelta(std::move(journalDelta));
});
auto journalDelta = make_unique<JournalDelta>();
journalDelta->fromHash = oldParents.parent1();
journalDelta->toHash = parents.parent1();
journal_.wlock()->addDelta(std::move(journalDelta));
});
}
struct timespec EdenMount::getLastCheckoutTime() {

View File

@ -169,7 +169,6 @@ class DiffTest {
DiffResults resetCommitAndDiff(
FakeTreeBuilder& builder,
bool readyImmediately,
bool loadInodes);
void checkNoChanges() {
@ -181,7 +180,7 @@ class DiffTest {
EXPECT_THAT(result.getModified(), UnorderedElementsAre());
}
void testResetFileModified(bool readyImmediately, bool loadInodes);
void testResetFileModified(bool loadInodes);
FakeTreeBuilder& getBuilder() {
return builder_;
@ -205,37 +204,15 @@ class DiffTest {
* updates the current commit ID.)
* - Calls EdenMount::diff(), waits for it to complete, and returns the
* results.
*
* If the readyImmediately parameter is true, all of the necessary source
* control objects will be marked ready in the object store before starting the
* diff operation. Therefore the diff operation should complete immediately.
* If readyImmediately is false, the diff operation will be started with the
* source control objects not ready yet. They will be made ready after the
* diff starts, to test diff behavior when future objects do not complete
* immediately. This lets us test both the cases where the entire diff()
* operation completes immediately (before diff() returns) and when it blocks
* and only gets completed later, after diff() returns.
*/
DiffResults DiffTest::resetCommitAndDiff(
FakeTreeBuilder& builder,
bool readyImmediately,
bool loadInodes) {
if (loadInodes) {
if (!readyImmediately) {
// loadAllInodes() cannot succeed if the underlying Trees and Blobs are
// not ready.
throw std::invalid_argument(
"readyImmediately=false and loadInodes=true "
"is an invalid combination");
}
mount_.loadAllInodes();
}
mount_.resetCommit(builder, readyImmediately);
mount_.resetCommit(builder, /* setReady = */ true);
auto df = diffFuture();
if (!readyImmediately) {
EXPECT_FALSE(df.isReady());
builder.setAllReady();
}
return EXPECT_FUTURE_RESULT(df);
}
@ -468,15 +445,14 @@ TEST(DiffTest, pathOrdering) {
* materialized, but are nonetheless different than the current commit.
*/
void testResetFileModified(bool readyImmediately, bool loadInodes) {
SCOPED_TRACE(folly::to<string>(
"readyImmediately=", readyImmediately, " loadInodes=", loadInodes));
void testResetFileModified(bool loadInodes) {
SCOPED_TRACE(folly::to<string>("loadInodes=", loadInodes));
DiffTest t;
auto b2 = t.getBuilder().clone();
b2.replaceFile("src/1.txt", "This file has been updated.\n");
auto result = t.resetCommitAndDiff(b2, readyImmediately, loadInodes);
auto result = t.resetCommitAndDiff(b2, loadInodes);
EXPECT_THAT(result.getErrors(), UnorderedElementsAre());
EXPECT_THAT(result.getUntracked(), UnorderedElementsAre());
EXPECT_THAT(result.getIgnored(), UnorderedElementsAre());
@ -486,20 +462,18 @@ void testResetFileModified(bool readyImmediately, bool loadInodes) {
}
TEST(DiffTest, resetFileModified) {
testResetFileModified(true, true);
testResetFileModified(true, false);
testResetFileModified(false, false);
testResetFileModified(true);
testResetFileModified(false);
}
void testResetFileModeChanged(bool readyImmediately, bool loadInodes) {
SCOPED_TRACE(folly::to<string>(
"readyImmediately=", readyImmediately, " loadInodes=", loadInodes));
void testResetFileModeChanged(bool loadInodes) {
SCOPED_TRACE(folly::to<string>("loadInodes=", loadInodes));
DiffTest t;
auto b2 = t.getBuilder().clone();
b2.replaceFile("src/1.txt", "This is src/1.txt.\n", 0755);
auto result = t.resetCommitAndDiff(b2, readyImmediately, loadInodes);
auto result = t.resetCommitAndDiff(b2, loadInodes);
EXPECT_THAT(result.getErrors(), UnorderedElementsAre());
EXPECT_THAT(result.getUntracked(), UnorderedElementsAre());
EXPECT_THAT(result.getIgnored(), UnorderedElementsAre());
@ -509,14 +483,12 @@ void testResetFileModeChanged(bool readyImmediately, bool loadInodes) {
}
TEST(DiffTest, resetFileModeChanged) {
testResetFileModeChanged(true, true);
testResetFileModeChanged(true, false);
testResetFileModeChanged(false, false);
testResetFileModeChanged(true);
testResetFileModeChanged(false);
}
void testResetFileRemoved(bool readyImmediately, bool loadInodes) {
SCOPED_TRACE(folly::to<string>(
"readyImmediately=", readyImmediately, " loadInodes=", loadInodes));
void testResetFileRemoved(bool loadInodes) {
SCOPED_TRACE(folly::to<string>("loadInodes=", loadInodes));
DiffTest t;
// Create a commit with a new file added.
@ -525,7 +497,7 @@ void testResetFileRemoved(bool readyImmediately, bool loadInodes) {
auto b2 = t.getBuilder().clone();
b2.setFile("src/notpresent.txt", "never present in the working directory");
auto result = t.resetCommitAndDiff(b2, readyImmediately, loadInodes);
auto result = t.resetCommitAndDiff(b2, loadInodes);
EXPECT_THAT(result.getErrors(), UnorderedElementsAre());
EXPECT_THAT(result.getUntracked(), UnorderedElementsAre());
EXPECT_THAT(result.getIgnored(), UnorderedElementsAre());
@ -536,14 +508,12 @@ void testResetFileRemoved(bool readyImmediately, bool loadInodes) {
}
TEST(DiffTest, resetFileRemoved) {
testResetFileRemoved(true, true);
testResetFileRemoved(true, false);
testResetFileRemoved(false, false);
testResetFileRemoved(true);
testResetFileRemoved(false);
}
void testResetFileAdded(bool readyImmediately, bool loadInodes) {
SCOPED_TRACE(folly::to<string>(
"readyImmediately=", readyImmediately, " loadInodes=", loadInodes));
void testResetFileAdded(bool loadInodes) {
SCOPED_TRACE(folly::to<string>("loadInodes=", loadInodes));
DiffTest t;
// Create a commit with a file removed.
@ -552,7 +522,7 @@ void testResetFileAdded(bool readyImmediately, bool loadInodes) {
auto b2 = t.getBuilder().clone();
b2.removeFile("src/1.txt");
auto result = t.resetCommitAndDiff(b2, readyImmediately, loadInodes);
auto result = t.resetCommitAndDiff(b2, loadInodes);
EXPECT_THAT(result.getErrors(), UnorderedElementsAre());
EXPECT_THAT(
result.getUntracked(), UnorderedElementsAre(RelativePath{"src/1.txt"}));
@ -562,14 +532,12 @@ void testResetFileAdded(bool readyImmediately, bool loadInodes) {
}
TEST(DiffTest, resetFileAdded) {
testResetFileAdded(true, true);
testResetFileAdded(true, false);
testResetFileAdded(false, false);
testResetFileAdded(true);
testResetFileAdded(false);
}
void testResetDirectoryRemoved(bool readyImmediately, bool loadInodes) {
SCOPED_TRACE(folly::to<string>(
"readyImmediately=", readyImmediately, " loadInodes=", loadInodes));
void testResetDirectoryRemoved(bool loadInodes) {
SCOPED_TRACE(folly::to<string>("loadInodes=", loadInodes));
DiffTest t;
// Create a commit with a new directory added.
@ -582,7 +550,7 @@ void testResetDirectoryRemoved(bool readyImmediately, bool loadInodes) {
b2.setFile("src/extradir/sub/xyz.txt", "xyz");
b2.setFile("src/extradir/a/b/c/d/e.txt", "test");
auto result = t.resetCommitAndDiff(b2, readyImmediately, loadInodes);
auto result = t.resetCommitAndDiff(b2, loadInodes);
EXPECT_THAT(result.getErrors(), UnorderedElementsAre());
EXPECT_THAT(result.getUntracked(), UnorderedElementsAre());
EXPECT_THAT(result.getIgnored(), UnorderedElementsAre());
@ -598,14 +566,12 @@ void testResetDirectoryRemoved(bool readyImmediately, bool loadInodes) {
}
TEST(DiffTest, resetDirectoryRemoved) {
testResetDirectoryRemoved(true, true);
testResetDirectoryRemoved(true, false);
testResetDirectoryRemoved(false, false);
testResetDirectoryRemoved(true);
testResetDirectoryRemoved(false);
}
void testResetDirectoryAdded(bool readyImmediately, bool loadInodes) {
SCOPED_TRACE(folly::to<string>(
"readyImmediately=", readyImmediately, " loadInodes=", loadInodes));
void testResetDirectoryAdded(bool loadInodes) {
SCOPED_TRACE(folly::to<string>("loadInodes=", loadInodes));
DiffTest t;
// Create a commit with a directory removed.
@ -615,7 +581,7 @@ void testResetDirectoryAdded(bool readyImmediately, bool loadInodes) {
b2.removeFile("src/a/b/3.txt");
b2.removeFile("src/a/b/c/4.txt");
auto result = t.resetCommitAndDiff(b2, readyImmediately, loadInodes);
auto result = t.resetCommitAndDiff(b2, loadInodes);
EXPECT_THAT(result.getErrors(), UnorderedElementsAre());
EXPECT_THAT(
result.getUntracked(),
@ -627,14 +593,12 @@ void testResetDirectoryAdded(bool readyImmediately, bool loadInodes) {
}
TEST(DiffTest, resetDirectoryAdded) {
testResetDirectoryAdded(true, true);
testResetDirectoryAdded(true, false);
testResetDirectoryAdded(false, false);
testResetDirectoryAdded(true);
testResetDirectoryAdded(false);
}
void testResetReplaceDirWithFile(bool readyImmediately, bool loadInodes) {
SCOPED_TRACE(folly::to<string>(
"readyImmediately=", readyImmediately, " loadInodes=", loadInodes));
void testResetReplaceDirWithFile(bool loadInodes) {
SCOPED_TRACE(folly::to<string>("loadInodes=", loadInodes));
DiffTest t;
// Create a commit with 2.txt replaced by a directory added.
@ -648,7 +612,7 @@ void testResetReplaceDirWithFile(bool readyImmediately, bool loadInodes) {
b2.setFile("src/2.txt/sub/xyz.txt", "xyz");
b2.setFile("src/2.txt/a/b/c/d/e.txt", "test");
auto result = t.resetCommitAndDiff(b2, readyImmediately, loadInodes);
auto result = t.resetCommitAndDiff(b2, loadInodes);
EXPECT_THAT(result.getErrors(), UnorderedElementsAre());
EXPECT_THAT(
result.getUntracked(), UnorderedElementsAre(RelativePath{"src/2.txt"}));
@ -665,14 +629,12 @@ void testResetReplaceDirWithFile(bool readyImmediately, bool loadInodes) {
}
TEST(DiffTest, resetReplaceDirWithFile) {
testResetReplaceDirWithFile(true, true);
testResetReplaceDirWithFile(true, false);
testResetReplaceDirWithFile(false, false);
testResetReplaceDirWithFile(true);
testResetReplaceDirWithFile(false);
}
void testResetReplaceFileWithDir(bool readyImmediately, bool loadInodes) {
SCOPED_TRACE(folly::to<string>(
"readyImmediately=", readyImmediately, " loadInodes=", loadInodes));
void testResetReplaceFileWithDir(bool loadInodes) {
SCOPED_TRACE(folly::to<string>("loadInodes=", loadInodes));
DiffTest t;
// Create a commit with a directory removed and replaced with a file.
@ -683,7 +645,7 @@ void testResetReplaceFileWithDir(bool readyImmediately, bool loadInodes) {
b2.removeFile("src/a/b/c/4.txt");
b2.replaceFile("src/a", "a is now a file");
auto result = t.resetCommitAndDiff(b2, readyImmediately, loadInodes);
auto result = t.resetCommitAndDiff(b2, loadInodes);
EXPECT_THAT(result.getErrors(), UnorderedElementsAre());
EXPECT_THAT(
result.getUntracked(),
@ -695,9 +657,8 @@ void testResetReplaceFileWithDir(bool readyImmediately, bool loadInodes) {
}
TEST(DiffTest, resetReplaceFileWithDir) {
testResetReplaceFileWithDir(true, true);
testResetReplaceFileWithDir(true, false);
testResetReplaceFileWithDir(false, false);
testResetReplaceFileWithDir(true);
testResetReplaceFileWithDir(false);
}
// Test with a .gitignore file in the top-level directory

View File

@ -55,6 +55,8 @@ std::unique_ptr<JournalDelta> JournalDelta::merge(
result->toSequence = current->toSequence;
result->toTime = current->toTime;
result->fromHash = fromHash;
result->toHash = toHash;
while (current) {
if (current->toSequence < limitSequence) {
@ -64,6 +66,11 @@ std::unique_ptr<JournalDelta> JournalDelta::merge(
// Capture the lower bound.
result->fromSequence = current->fromSequence;
result->fromTime = current->fromTime;
result->fromHash = current->fromHash;
// Merge the unclean status list
result->uncleanPaths.insert(
current->uncleanPaths.begin(), current->uncleanPaths.end());
// process created files.
for (auto& created : current->createdFilesInOverlay) {

View File

@ -24,6 +24,10 @@ class JournalDelta {
enum Removed { REMOVED };
enum Renamed { RENAME };
JournalDelta() = default;
JournalDelta(JournalDelta&&) = default;
JournalDelta& operator=(JournalDelta&&) = default;
JournalDelta(const JournalDelta&) = delete;
JournalDelta& operator=(const JournalDelta&) = delete;
JournalDelta(std::initializer_list<RelativePath> overlayFileNames);
JournalDelta(RelativePathPiece fileName, Created);
JournalDelta(RelativePathPiece fileName, Removed);
@ -52,6 +56,9 @@ class JournalDelta {
std::unordered_set<RelativePath> createdFilesInOverlay;
/** The set of files that were removed in the overlay in this update */
std::unordered_set<RelativePath> removedFilesInOverlay;
/** The set of files that had differing status across a checkout or
* some other operation that changes the snapshot hash */
std::unordered_set<RelativePath> uncleanPaths;
/** Merge the deltas running back from this delta for all deltas
* whose toSequence is >= limitSequence.

View File

@ -280,6 +280,10 @@ void EdenServiceHandler::getFilesChangedSince(
for (auto& path : merged->removedFilesInOverlay) {
out.removedPaths.emplace_back(path.stringPiece().str());
}
for (auto& path : merged->uncleanPaths) {
out.uncleanPaths.emplace_back(path.stringPiece().str());
}
}
}

View File

@ -96,6 +96,15 @@ struct FileDelta {
3: list<string> changedPaths
4: list<string> createdPaths
5: list<string> removedPaths
/** When fromPosition.snapshotHash != toPosition.snapshotHash this holds
* the union of the set of files whose StatusCode differed from the
* committed fromPosition hash before the hash changed, and the set of
* files whose StatusCode differed from the committed toPosition hash
* after the hash was changed. This list of files represents files
* whose state may have changed as part of an update operation, but
* in ways that may not be able to be extracted solely by performing
* source control diff operations on the from/to hashes. */
6: list<string> uncleanPaths
}
enum StatusCode {