diff --git a/eden/fs/inodes/test/JournalUpdateTest.cpp b/eden/fs/inodes/test/JournalUpdateTest.cpp index 2777eb44ff..4f8af2d255 100644 --- a/eden/fs/inodes/test/JournalUpdateTest.cpp +++ b/eden/fs/inodes/test/JournalUpdateTest.cpp @@ -39,20 +39,20 @@ TEST_F(JournalUpdateTest, moveFileRename) { mount_.addFile("new_file.txt", ""); mount_.move("new_file.txt", "new_file2.txt"); - auto mergedDelta = journal.getLatest()->merge(testStart); + auto summedDelta = journal.accumulateRange(testStart); auto oldPath = RelativePath{"new_file.txt"}; auto newPath = RelativePath{"new_file2.txt"}; - ASSERT_EQ(1, mergedDelta->changedFilesInOverlay.count(oldPath)); - ASSERT_EQ(1, mergedDelta->changedFilesInOverlay.count(newPath)); + ASSERT_EQ(1, summedDelta->changedFilesInOverlay.count(oldPath)); + ASSERT_EQ(1, summedDelta->changedFilesInOverlay.count(newPath)); - EXPECT_FALSE(mergedDelta->changedFilesInOverlay[oldPath].existedBefore); - EXPECT_FALSE(mergedDelta->changedFilesInOverlay[oldPath].existedAfter); - EXPECT_FALSE(mergedDelta->changedFilesInOverlay[newPath].existedBefore); - EXPECT_TRUE(mergedDelta->changedFilesInOverlay[newPath].existedAfter); + EXPECT_FALSE(summedDelta->changedFilesInOverlay[oldPath].existedBefore); + EXPECT_FALSE(summedDelta->changedFilesInOverlay[oldPath].existedAfter); + EXPECT_FALSE(summedDelta->changedFilesInOverlay[newPath].existedBefore); + EXPECT_TRUE(summedDelta->changedFilesInOverlay[newPath].existedAfter); - EXPECT_EQ(mergedDelta->uncleanPaths, std::unordered_set{}); + EXPECT_EQ(summedDelta->uncleanPaths, std::unordered_set{}); } TEST_F(JournalUpdateTest, moveFileReplace) { @@ -63,18 +63,18 @@ TEST_F(JournalUpdateTest, moveFileReplace) { mount_.move("new_file.txt", "existing_file.txt"); mount_.deleteFile("existing_file.txt"); - auto mergedDelta = journal.getLatest()->merge(testStart); + auto summedDelta = journal.accumulateRange(testStart); auto oldPath = RelativePath{"existing_file.txt"}; auto newPath = RelativePath{"new_file.txt"}; - ASSERT_EQ(1, mergedDelta->changedFilesInOverlay.count(oldPath)); - ASSERT_EQ(1, mergedDelta->changedFilesInOverlay.count(newPath)); + ASSERT_EQ(1, summedDelta->changedFilesInOverlay.count(oldPath)); + ASSERT_EQ(1, summedDelta->changedFilesInOverlay.count(newPath)); - EXPECT_TRUE(mergedDelta->changedFilesInOverlay[oldPath].existedBefore); - EXPECT_FALSE(mergedDelta->changedFilesInOverlay[oldPath].existedAfter); - EXPECT_FALSE(mergedDelta->changedFilesInOverlay[newPath].existedBefore); - EXPECT_FALSE(mergedDelta->changedFilesInOverlay[newPath].existedAfter); + EXPECT_TRUE(summedDelta->changedFilesInOverlay[oldPath].existedBefore); + EXPECT_FALSE(summedDelta->changedFilesInOverlay[oldPath].existedAfter); + EXPECT_FALSE(summedDelta->changedFilesInOverlay[newPath].existedBefore); + EXPECT_FALSE(summedDelta->changedFilesInOverlay[newPath].existedAfter); - EXPECT_EQ(mergedDelta->uncleanPaths, std::unordered_set{}); + EXPECT_EQ(summedDelta->uncleanPaths, std::unordered_set{}); } diff --git a/eden/fs/journal/Journal.cpp b/eden/fs/journal/Journal.cpp index 243f3d6a8f..f883617ebc 100644 --- a/eden/fs/journal/Journal.cpp +++ b/eden/fs/journal/Journal.cpp @@ -8,6 +8,7 @@ * */ #include "Journal.h" +#include namespace facebook { namespace eden { @@ -150,5 +151,73 @@ bool Journal::isSubscriberValid(uint64_t id) const { std::optional Journal::getStats() { return deltaState_.rlock()->stats; } + +namespace { +folly::StringPiece eventCharacterizationFor(const PathChangeInfo& ci) { + if (ci.existedBefore && !ci.existedAfter) { + return "Removed"; + } else if (!ci.existedBefore && ci.existedAfter) { + return "Created"; + } else if (ci.existedBefore && ci.existedAfter) { + return "Changed"; + } else { + return "Ghost"; + } +} +} // namespace + +std::unique_ptr Journal::accumulateRange( + SequenceNumber limitSequence) const { + auto result = std::make_unique(); + { + auto deltaState = deltaState_.rlock(); + if (deltaState->latest->toSequence < limitSequence) { + return nullptr; + } + + const JournalDelta* current = deltaState->latest.get(); + + result->toSequence = current->toSequence; + result->toTime = current->toTime; + result->fromHash = current->fromHash; + result->toHash = current->toHash; + + while (current) { + if (current->toSequence < limitSequence) { + break; + } + + // Capture the lower bound. + result->fromSequence = current->fromSequence; + result->fromTime = current->fromTime; + result->fromHash = current->fromHash; + + // Merge the unclean status list + result->uncleanPaths.insert( + current->uncleanPaths.begin(), current->uncleanPaths.end()); + + for (auto& entry : current->changedFilesInOverlay) { + auto& name = entry.first; + auto& currentInfo = entry.second; + auto* resultInfo = folly::get_ptr(result->changedFilesInOverlay, name); + if (!resultInfo) { + result->changedFilesInOverlay.emplace(name, currentInfo); + } else { + if (resultInfo->existedBefore != currentInfo.existedAfter) { + auto event1 = eventCharacterizationFor(currentInfo); + auto event2 = eventCharacterizationFor(*resultInfo); + XLOG(ERR) << "Journal for " << name << " holds invalid " << event1 + << ", " << event2 << " sequence"; + } + + resultInfo->existedBefore = currentInfo.existedBefore; + } + } + + current = current->previous.get(); + } + } + return result; +} } // namespace eden } // namespace facebook diff --git a/eden/fs/journal/Journal.h b/eden/fs/journal/Journal.h index 1610d2ee8d..ac11fdc75c 100644 --- a/eden/fs/journal/Journal.h +++ b/eden/fs/journal/Journal.h @@ -116,6 +116,16 @@ class Journal { * that contains valid JournalStats if the Journal is non-empty*/ std::optional getStats(); + /** Gets the sum of the modifications done by the deltas with Sequence + * Numbers >= limitSequence, if the limitSequence is further back than the + * Journal remembers isTruncated will be set on the JournalDeltaSum + * The default limit value is 0 which is never assigned by the Journal + * and thus indicates that all deltas should be summed. + * If the limitSequence means that no deltas will match, returns nullptr. + * */ + std::unique_ptr accumulateRange( + SequenceNumber limitSequence = 0) const; + private: /** Add a delta to the journal. * The delta will have a new sequence number and timestamp diff --git a/eden/fs/journal/JournalDelta.cpp b/eden/fs/journal/JournalDelta.cpp index 16d48984a7..8b048a49ec 100644 --- a/eden/fs/journal/JournalDelta.cpp +++ b/eden/fs/journal/JournalDelta.cpp @@ -13,20 +13,6 @@ namespace facebook { namespace eden { -namespace { -folly::StringPiece eventCharacterizationFor(const PathChangeInfo& ci) { - if (ci.existedBefore && !ci.existedAfter) { - return "Removed"; - } else if (!ci.existedBefore && ci.existedAfter) { - return "Created"; - } else if (ci.existedBefore && ci.existedAfter) { - return "Changed"; - } else { - return "Ghost"; - } -} -} // namespace - JournalDelta::JournalDelta(RelativePathPiece fileName, JournalDelta::Created) : changedFilesInOverlay{{fileName.copy(), PathChangeInfo{false, true}}} {} @@ -61,91 +47,13 @@ JournalDelta::~JournalDelta() { } } -std::unique_ptr JournalDelta::merge( - SequenceNumber limitSequence, - bool pruneAfterLimit) const { - if (toSequence < limitSequence) { - return nullptr; - } - - const JournalDelta* current = this; - - auto result = std::make_unique(); - - result->toSequence = current->toSequence; - result->toTime = current->toTime; - result->fromHash = fromHash; - result->toHash = toHash; - - while (current) { - if (current->toSequence < limitSequence) { - break; - } - - // Capture the lower bound. - result->fromSequence = current->fromSequence; - result->fromTime = current->fromTime; - result->fromHash = current->fromHash; - - // Merge the unclean status list - result->uncleanPaths.insert( - current->uncleanPaths.begin(), current->uncleanPaths.end()); - - for (auto& entry : current->changedFilesInOverlay) { - auto& name = entry.first; - auto& currentInfo = entry.second; - auto* resultInfo = folly::get_ptr(result->changedFilesInOverlay, name); - if (!resultInfo) { - result->changedFilesInOverlay.emplace(name, currentInfo); - } else { - if (resultInfo->existedBefore != currentInfo.existedAfter) { - auto event1 = eventCharacterizationFor(currentInfo); - auto event2 = eventCharacterizationFor(*resultInfo); - XLOG(ERR) << "Journal for " << name << " holds invalid " << event1 - << ", " << event2 << " sequence"; - } - - resultInfo->existedBefore = currentInfo.existedBefore; - } - } - - // Continue the chain, but not if the caller requested that - // we prune it out. - if (!pruneAfterLimit) { - result->previous = current->previous; - } - - current = current->previous.get(); - } - - return result; -} - size_t JournalDelta::estimateMemoryUsage() const { size_t mem = folly::goodMallocSize(sizeof(JournalDelta)); - /* NOTE: The following code assumes an unordered_map (and similarly an - * unordered_set) is separated into an array of buckets, each one being - * a chain of nodes containing a next pointer, a key-value pair, and a stored - * hash + /* NOTE: The following code assumes an unordered_set is separated into an + * array of buckets, each one being a chain of nodes containing a next + * pointer, a key-value pair, and a stored hash */ // Calculate Memory For Nodes in Each Bucket (Pointer to element and next) - size_t map_elem_size = folly::goodMallocSize( - sizeof(void*) + sizeof(decltype(changedFilesInOverlay)::value_type) + - sizeof(size_t)); - for (unsigned long i = 0; i < changedFilesInOverlay.bucket_count(); ++i) { - mem += map_elem_size * changedFilesInOverlay.bucket_size(i); - } - - // Calculate Memory Usage of Bucket List - mem += folly::goodMallocSize( - sizeof(void*) * changedFilesInOverlay.bucket_count()); - - // Calculate Memory Usage used indirectly by paths used as keys - for (auto& [path, change_info] : changedFilesInOverlay) { - mem += estimateIndirectMemoryUsage(path); - } - - // Calculate Memory For Nodes in Each Bucket (similar to above) size_t set_elem_size = folly::goodMallocSize( sizeof(void*) + sizeof(decltype(uncleanPaths)::value_type) + sizeof(size_t)); diff --git a/eden/fs/journal/JournalDelta.h b/eden/fs/journal/JournalDelta.h index c37425642d..8f6c84216b 100644 --- a/eden/fs/journal/JournalDelta.h +++ b/eden/fs/journal/JournalDelta.h @@ -100,19 +100,6 @@ class JournalDelta { * some other operation that changes the snapshot hash */ std::unordered_set uncleanPaths; - /** Merge the deltas running back from this delta for all deltas - * whose toSequence is >= limitSequence. - * The default limit value is 0 which is never assigned by the Journal - * and thus indicates that all deltas should be merged. - * if pruneAfterLimit is true and we stop due to hitting limitSequence, - * then the returned delta will have previous=nullptr rather than - * maintaining the chain. - * If the limitSequence means that no deltas will match, returns nullptr. - * */ - std::unique_ptr merge( - SequenceNumber limitSequence = 0, - bool pruneAfterLimit = false) const; - /** Get memory used (in bytes) by this Delta */ size_t estimateMemoryUsage() const; @@ -127,5 +114,36 @@ class JournalDelta { friend class JournalDeltaPtr; }; +struct JournalDeltaRange { + /** The current sequence range. + * This is a range to accommodate merging a range into a single entry. */ + JournalDelta::SequenceNumber fromSequence; + JournalDelta::SequenceNumber toSequence; + /** The time at which the change was recorded. + * This is a range to accommodate merging a range into a single entry. */ + std::chrono::steady_clock::time_point fromTime; + std::chrono::steady_clock::time_point toTime; + + /** The snapshot hash that we started and ended up on. + * This will often be the same unless we perform a checkout or make + * a new snapshot from the snapshotable files in the overlay. */ + Hash fromHash; + Hash toHash; + + /** + * The set of files that changed in the overlay in this update, including + * some information about the changes. + */ + std::unordered_map changedFilesInOverlay; + /** The set of files that had differing status across a checkout or + * some other operation that changes the snapshot hash */ + std::unordered_set uncleanPaths; + + bool isTruncated = false; + JournalDeltaRange() = default; + JournalDeltaRange(JournalDeltaRange&&) = default; + JournalDeltaRange& operator=(JournalDeltaRange&&) = default; +}; + } // namespace eden } // namespace facebook diff --git a/eden/fs/journal/test/JournalTest.cpp b/eden/fs/journal/test/JournalTest.cpp index 7fcc4dbefa..999dcd0018 100644 --- a/eden/fs/journal/test/JournalTest.cpp +++ b/eden/fs/journal/test/JournalTest.cpp @@ -14,7 +14,7 @@ using namespace facebook::eden; using ::testing::UnorderedElementsAre; -TEST(Journal, merges_chained_deltas) { +TEST(Journal, accumulate_range_all_changes) { Journal journal; // Make an initial entry. @@ -35,42 +35,29 @@ TEST(Journal, merges_chained_deltas) { EXPECT_EQ(2, latest->fromSequence); EXPECT_EQ(1, latest->previous->toSequence); - // Check basic merge implementation. - auto merged = latest->merge(); - ASSERT_NE(nullptr, merged); - EXPECT_EQ(1, merged->fromSequence); - EXPECT_EQ(2, merged->toSequence); - EXPECT_EQ(2, merged->changedFilesInOverlay.size()); - EXPECT_EQ(nullptr, merged->previous); - - // Let's try with some limits. + // Check basic sum implementation. + auto summed = journal.accumulateRange(); + ASSERT_NE(nullptr, summed); + EXPECT_EQ(1, summed->fromSequence); + EXPECT_EQ(2, summed->toSequence); + EXPECT_EQ(2, summed->changedFilesInOverlay.size()); // First just report the most recent item. - merged = latest->merge(2); - ASSERT_NE(nullptr, merged); - EXPECT_EQ(2, merged->fromSequence); - EXPECT_EQ(2, merged->toSequence); - EXPECT_EQ(1, merged->changedFilesInOverlay.size()); - EXPECT_NE(nullptr, merged->previous); - - // Prune off sequence==1 - merged = latest->merge(2, true); - ASSERT_NE(nullptr, merged); - EXPECT_EQ(2, merged->fromSequence); - EXPECT_EQ(2, merged->toSequence); - EXPECT_EQ(1, merged->changedFilesInOverlay.size()); - EXPECT_EQ(nullptr, merged->previous); + summed = journal.accumulateRange(2); + ASSERT_NE(nullptr, summed); + EXPECT_EQ(2, summed->fromSequence); + EXPECT_EQ(2, summed->toSequence); + EXPECT_EQ(1, summed->changedFilesInOverlay.size()); // Merge the first two entries. - merged = latest->merge(1); - ASSERT_NE(nullptr, merged); - EXPECT_EQ(1, merged->fromSequence); - EXPECT_EQ(2, merged->toSequence); - EXPECT_EQ(2, merged->changedFilesInOverlay.size()); - EXPECT_EQ(nullptr, merged->previous); + summed = journal.accumulateRange(1); + ASSERT_NE(nullptr, summed); + EXPECT_EQ(1, summed->fromSequence); + EXPECT_EQ(2, summed->toSequence); + EXPECT_EQ(2, summed->changedFilesInOverlay.size()); } -TEST(Journal, mergeRemoveCreateUpdate) { +TEST(Journal, accumulateRangeRemoveCreateUpdate) { Journal journal; // Remove test.txt @@ -85,59 +72,59 @@ TEST(Journal, mergeRemoveCreateUpdate) { EXPECT_EQ(3, latest->toSequence); EXPECT_EQ(3, latest->fromSequence); - // The merged data should report test.txt as changed - auto merged = latest->merge(); - ASSERT_NE(nullptr, merged); - EXPECT_EQ(1, merged->fromSequence); - EXPECT_EQ(3, merged->toSequence); - EXPECT_EQ(1, merged->changedFilesInOverlay.size()); - ASSERT_EQ(1, merged->changedFilesInOverlay.count(RelativePath{"test.txt"})); + // The summed data should report test.txt as changed + auto summed = journal.accumulateRange(); + ASSERT_NE(nullptr, summed); + EXPECT_EQ(1, summed->fromSequence); + EXPECT_EQ(3, summed->toSequence); + EXPECT_EQ(1, summed->changedFilesInOverlay.size()); + ASSERT_EQ(1, summed->changedFilesInOverlay.count(RelativePath{"test.txt"})); EXPECT_EQ( true, - merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore); + summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore); EXPECT_EQ( true, - merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter); + summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter); // Test merging only partway back - merged = latest->merge(3); - ASSERT_NE(nullptr, merged); - EXPECT_EQ(3, merged->fromSequence); - EXPECT_EQ(3, merged->toSequence); - EXPECT_EQ(1, merged->changedFilesInOverlay.size()); - ASSERT_EQ(1, merged->changedFilesInOverlay.count(RelativePath{"test.txt"})); + summed = journal.accumulateRange(3); + ASSERT_NE(nullptr, summed); + EXPECT_EQ(3, summed->fromSequence); + EXPECT_EQ(3, summed->toSequence); + EXPECT_EQ(1, summed->changedFilesInOverlay.size()); + ASSERT_EQ(1, summed->changedFilesInOverlay.count(RelativePath{"test.txt"})); EXPECT_EQ( true, - merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore); + summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore); EXPECT_EQ( true, - merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter); + summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter); - merged = latest->merge(2); - ASSERT_NE(nullptr, merged); - EXPECT_EQ(2, merged->fromSequence); - EXPECT_EQ(3, merged->toSequence); - EXPECT_EQ(1, merged->changedFilesInOverlay.size()); - ASSERT_EQ(1, merged->changedFilesInOverlay.count(RelativePath{"test.txt"})); + summed = journal.accumulateRange(2); + ASSERT_NE(nullptr, summed); + EXPECT_EQ(2, summed->fromSequence); + EXPECT_EQ(3, summed->toSequence); + EXPECT_EQ(1, summed->changedFilesInOverlay.size()); + ASSERT_EQ(1, summed->changedFilesInOverlay.count(RelativePath{"test.txt"})); EXPECT_EQ( false, - merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore); + summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore); EXPECT_EQ( true, - merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter); + summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter); - merged = latest->merge(1); - ASSERT_NE(nullptr, merged); - EXPECT_EQ(1, merged->fromSequence); - EXPECT_EQ(3, merged->toSequence); - EXPECT_EQ(1, merged->changedFilesInOverlay.size()); - ASSERT_EQ(1, merged->changedFilesInOverlay.count(RelativePath{"test.txt"})); + summed = journal.accumulateRange(1); + ASSERT_NE(nullptr, summed); + EXPECT_EQ(1, summed->fromSequence); + EXPECT_EQ(3, summed->toSequence); + EXPECT_EQ(1, summed->changedFilesInOverlay.size()); + ASSERT_EQ(1, summed->changedFilesInOverlay.count(RelativePath{"test.txt"})); EXPECT_EQ( true, - merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore); + summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore); EXPECT_EQ( true, - merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter); + summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter); } TEST(Journal, destruction_does_not_overflow_stack_on_long_chain) { diff --git a/eden/fs/service/EdenServiceHandler.cpp b/eden/fs/service/EdenServiceHandler.cpp index ef41970fb3..a3e6671ef0 100644 --- a/eden/fs/service/EdenServiceHandler.cpp +++ b/eden/fs/service/EdenServiceHandler.cpp @@ -558,7 +558,6 @@ void EdenServiceHandler::getFilesChangedSince( #ifndef _WIN32 auto helper = INSTRUMENT_THRIFT_CALL(DBG3, *mountPoint); auto edenMount = server_->getMount(*mountPoint); - auto delta = edenMount->getJournal().getLatest(); if (fromPosition->mountGeneration != static_cast(edenMount->getMountGeneration())) { @@ -569,22 +568,29 @@ void EdenServiceHandler::getFilesChangedSince( "You need to compute a new basis for delta queries."); } - out.toPosition.sequenceNumber = delta->toSequence; - out.toPosition.snapshotHash = thriftHash(delta->toHash); + // The +1 is because the core merge stops at the item prior to + // its limitSequence parameter and we want the changes *since* + // the provided sequence number. + auto summed = + edenMount->getJournal().accumulateRange(fromPosition->sequenceNumber + 1); + + // We set the default toPosition to be where we where if summed is null + out.toPosition.sequenceNumber = fromPosition->sequenceNumber; + out.toPosition.snapshotHash = fromPosition->snapshotHash; out.toPosition.mountGeneration = edenMount->getMountGeneration(); out.fromPosition = out.toPosition; - // The +1 is because the core merge stops at the item prior to - // its limitSequence parameter and we want the changes *since* - // the provided sequence number. - auto merged = delta->merge(fromPosition->sequenceNumber + 1, true); - if (merged) { - out.fromPosition.sequenceNumber = merged->fromSequence; - out.fromPosition.snapshotHash = thriftHash(merged->fromHash); + if (summed) { + out.toPosition.sequenceNumber = summed->toSequence; + out.toPosition.snapshotHash = thriftHash(summed->toHash); + out.toPosition.mountGeneration = edenMount->getMountGeneration(); + + out.fromPosition.sequenceNumber = summed->fromSequence; + out.fromPosition.snapshotHash = thriftHash(summed->fromHash); out.fromPosition.mountGeneration = out.toPosition.mountGeneration; - for (const auto& entry : merged->changedFilesInOverlay) { + for (const auto& entry : summed->changedFilesInOverlay) { auto& path = entry.first; auto& changeInfo = entry.second; if (changeInfo.isNew()) { @@ -594,7 +600,7 @@ void EdenServiceHandler::getFilesChangedSince( } } - for (auto& path : merged->uncleanPaths) { + for (auto& path : summed->uncleanPaths) { out.uncleanPaths.emplace_back(path.stringPiece().str()); } }