Changing Journal API from merge to accumulateRange

Summary: Merge was a function on a JournalDelta that created a new JournalDelta and optionally left it connected to old JournalDeltas. AccumulateRange is a new function on the Journal itself (acting on the latest delta) that creates a JournalDeltaSum (which can't be connected to older Deltas)

Reviewed By: chadaustin

Differential Revision: D15881452

fbshipit-source-id: 573505c1171f78d46afc98f1db9b5b9ee2fff60f
This commit is contained in:
Jake Crouch 2019-06-19 15:12:50 -07:00 committed by Facebook Github Bot
parent 26a6e91e05
commit 6760da6baf
7 changed files with 198 additions and 200 deletions

View File

@ -39,20 +39,20 @@ TEST_F(JournalUpdateTest, moveFileRename) {
mount_.addFile("new_file.txt", "");
mount_.move("new_file.txt", "new_file2.txt");
auto mergedDelta = journal.getLatest()->merge(testStart);
auto summedDelta = journal.accumulateRange(testStart);
auto oldPath = RelativePath{"new_file.txt"};
auto newPath = RelativePath{"new_file2.txt"};
ASSERT_EQ(1, mergedDelta->changedFilesInOverlay.count(oldPath));
ASSERT_EQ(1, mergedDelta->changedFilesInOverlay.count(newPath));
ASSERT_EQ(1, summedDelta->changedFilesInOverlay.count(oldPath));
ASSERT_EQ(1, summedDelta->changedFilesInOverlay.count(newPath));
EXPECT_FALSE(mergedDelta->changedFilesInOverlay[oldPath].existedBefore);
EXPECT_FALSE(mergedDelta->changedFilesInOverlay[oldPath].existedAfter);
EXPECT_FALSE(mergedDelta->changedFilesInOverlay[newPath].existedBefore);
EXPECT_TRUE(mergedDelta->changedFilesInOverlay[newPath].existedAfter);
EXPECT_FALSE(summedDelta->changedFilesInOverlay[oldPath].existedBefore);
EXPECT_FALSE(summedDelta->changedFilesInOverlay[oldPath].existedAfter);
EXPECT_FALSE(summedDelta->changedFilesInOverlay[newPath].existedBefore);
EXPECT_TRUE(summedDelta->changedFilesInOverlay[newPath].existedAfter);
EXPECT_EQ(mergedDelta->uncleanPaths, std::unordered_set<RelativePath>{});
EXPECT_EQ(summedDelta->uncleanPaths, std::unordered_set<RelativePath>{});
}
TEST_F(JournalUpdateTest, moveFileReplace) {
@ -63,18 +63,18 @@ TEST_F(JournalUpdateTest, moveFileReplace) {
mount_.move("new_file.txt", "existing_file.txt");
mount_.deleteFile("existing_file.txt");
auto mergedDelta = journal.getLatest()->merge(testStart);
auto summedDelta = journal.accumulateRange(testStart);
auto oldPath = RelativePath{"existing_file.txt"};
auto newPath = RelativePath{"new_file.txt"};
ASSERT_EQ(1, mergedDelta->changedFilesInOverlay.count(oldPath));
ASSERT_EQ(1, mergedDelta->changedFilesInOverlay.count(newPath));
ASSERT_EQ(1, summedDelta->changedFilesInOverlay.count(oldPath));
ASSERT_EQ(1, summedDelta->changedFilesInOverlay.count(newPath));
EXPECT_TRUE(mergedDelta->changedFilesInOverlay[oldPath].existedBefore);
EXPECT_FALSE(mergedDelta->changedFilesInOverlay[oldPath].existedAfter);
EXPECT_FALSE(mergedDelta->changedFilesInOverlay[newPath].existedBefore);
EXPECT_FALSE(mergedDelta->changedFilesInOverlay[newPath].existedAfter);
EXPECT_TRUE(summedDelta->changedFilesInOverlay[oldPath].existedBefore);
EXPECT_FALSE(summedDelta->changedFilesInOverlay[oldPath].existedAfter);
EXPECT_FALSE(summedDelta->changedFilesInOverlay[newPath].existedBefore);
EXPECT_FALSE(summedDelta->changedFilesInOverlay[newPath].existedAfter);
EXPECT_EQ(mergedDelta->uncleanPaths, std::unordered_set<RelativePath>{});
EXPECT_EQ(summedDelta->uncleanPaths, std::unordered_set<RelativePath>{});
}

View File

@ -8,6 +8,7 @@
*
*/
#include "Journal.h"
#include <folly/logging/xlog.h>
namespace facebook {
namespace eden {
@ -150,5 +151,73 @@ bool Journal::isSubscriberValid(uint64_t id) const {
std::optional<JournalStats> Journal::getStats() {
return deltaState_.rlock()->stats;
}
namespace {
folly::StringPiece eventCharacterizationFor(const PathChangeInfo& ci) {
if (ci.existedBefore && !ci.existedAfter) {
return "Removed";
} else if (!ci.existedBefore && ci.existedAfter) {
return "Created";
} else if (ci.existedBefore && ci.existedAfter) {
return "Changed";
} else {
return "Ghost";
}
}
} // namespace
std::unique_ptr<JournalDeltaRange> Journal::accumulateRange(
SequenceNumber limitSequence) const {
auto result = std::make_unique<JournalDeltaRange>();
{
auto deltaState = deltaState_.rlock();
if (deltaState->latest->toSequence < limitSequence) {
return nullptr;
}
const JournalDelta* current = deltaState->latest.get();
result->toSequence = current->toSequence;
result->toTime = current->toTime;
result->fromHash = current->fromHash;
result->toHash = current->toHash;
while (current) {
if (current->toSequence < limitSequence) {
break;
}
// Capture the lower bound.
result->fromSequence = current->fromSequence;
result->fromTime = current->fromTime;
result->fromHash = current->fromHash;
// Merge the unclean status list
result->uncleanPaths.insert(
current->uncleanPaths.begin(), current->uncleanPaths.end());
for (auto& entry : current->changedFilesInOverlay) {
auto& name = entry.first;
auto& currentInfo = entry.second;
auto* resultInfo = folly::get_ptr(result->changedFilesInOverlay, name);
if (!resultInfo) {
result->changedFilesInOverlay.emplace(name, currentInfo);
} else {
if (resultInfo->existedBefore != currentInfo.existedAfter) {
auto event1 = eventCharacterizationFor(currentInfo);
auto event2 = eventCharacterizationFor(*resultInfo);
XLOG(ERR) << "Journal for " << name << " holds invalid " << event1
<< ", " << event2 << " sequence";
}
resultInfo->existedBefore = currentInfo.existedBefore;
}
}
current = current->previous.get();
}
}
return result;
}
} // namespace eden
} // namespace facebook

View File

@ -116,6 +116,16 @@ class Journal {
* that contains valid JournalStats if the Journal is non-empty*/
std::optional<JournalStats> getStats();
/** Gets the sum of the modifications done by the deltas with Sequence
* Numbers >= limitSequence, if the limitSequence is further back than the
* Journal remembers isTruncated will be set on the JournalDeltaSum
* The default limit value is 0 which is never assigned by the Journal
* and thus indicates that all deltas should be summed.
* If the limitSequence means that no deltas will match, returns nullptr.
* */
std::unique_ptr<JournalDeltaRange> accumulateRange(
SequenceNumber limitSequence = 0) const;
private:
/** Add a delta to the journal.
* The delta will have a new sequence number and timestamp

View File

@ -13,20 +13,6 @@
namespace facebook {
namespace eden {
namespace {
folly::StringPiece eventCharacterizationFor(const PathChangeInfo& ci) {
if (ci.existedBefore && !ci.existedAfter) {
return "Removed";
} else if (!ci.existedBefore && ci.existedAfter) {
return "Created";
} else if (ci.existedBefore && ci.existedAfter) {
return "Changed";
} else {
return "Ghost";
}
}
} // namespace
JournalDelta::JournalDelta(RelativePathPiece fileName, JournalDelta::Created)
: changedFilesInOverlay{{fileName.copy(), PathChangeInfo{false, true}}} {}
@ -61,91 +47,13 @@ JournalDelta::~JournalDelta() {
}
}
std::unique_ptr<JournalDelta> JournalDelta::merge(
SequenceNumber limitSequence,
bool pruneAfterLimit) const {
if (toSequence < limitSequence) {
return nullptr;
}
const JournalDelta* current = this;
auto result = std::make_unique<JournalDelta>();
result->toSequence = current->toSequence;
result->toTime = current->toTime;
result->fromHash = fromHash;
result->toHash = toHash;
while (current) {
if (current->toSequence < limitSequence) {
break;
}
// Capture the lower bound.
result->fromSequence = current->fromSequence;
result->fromTime = current->fromTime;
result->fromHash = current->fromHash;
// Merge the unclean status list
result->uncleanPaths.insert(
current->uncleanPaths.begin(), current->uncleanPaths.end());
for (auto& entry : current->changedFilesInOverlay) {
auto& name = entry.first;
auto& currentInfo = entry.second;
auto* resultInfo = folly::get_ptr(result->changedFilesInOverlay, name);
if (!resultInfo) {
result->changedFilesInOverlay.emplace(name, currentInfo);
} else {
if (resultInfo->existedBefore != currentInfo.existedAfter) {
auto event1 = eventCharacterizationFor(currentInfo);
auto event2 = eventCharacterizationFor(*resultInfo);
XLOG(ERR) << "Journal for " << name << " holds invalid " << event1
<< ", " << event2 << " sequence";
}
resultInfo->existedBefore = currentInfo.existedBefore;
}
}
// Continue the chain, but not if the caller requested that
// we prune it out.
if (!pruneAfterLimit) {
result->previous = current->previous;
}
current = current->previous.get();
}
return result;
}
size_t JournalDelta::estimateMemoryUsage() const {
size_t mem = folly::goodMallocSize(sizeof(JournalDelta));
/* NOTE: The following code assumes an unordered_map (and similarly an
* unordered_set) is separated into an array of buckets, each one being
* a chain of nodes containing a next pointer, a key-value pair, and a stored
* hash
/* NOTE: The following code assumes an unordered_set is separated into an
* array of buckets, each one being a chain of nodes containing a next
* pointer, a key-value pair, and a stored hash
*/
// Calculate Memory For Nodes in Each Bucket (Pointer to element and next)
size_t map_elem_size = folly::goodMallocSize(
sizeof(void*) + sizeof(decltype(changedFilesInOverlay)::value_type) +
sizeof(size_t));
for (unsigned long i = 0; i < changedFilesInOverlay.bucket_count(); ++i) {
mem += map_elem_size * changedFilesInOverlay.bucket_size(i);
}
// Calculate Memory Usage of Bucket List
mem += folly::goodMallocSize(
sizeof(void*) * changedFilesInOverlay.bucket_count());
// Calculate Memory Usage used indirectly by paths used as keys
for (auto& [path, change_info] : changedFilesInOverlay) {
mem += estimateIndirectMemoryUsage(path);
}
// Calculate Memory For Nodes in Each Bucket (similar to above)
size_t set_elem_size = folly::goodMallocSize(
sizeof(void*) + sizeof(decltype(uncleanPaths)::value_type) +
sizeof(size_t));

View File

@ -100,19 +100,6 @@ class JournalDelta {
* some other operation that changes the snapshot hash */
std::unordered_set<RelativePath> uncleanPaths;
/** Merge the deltas running back from this delta for all deltas
* whose toSequence is >= limitSequence.
* The default limit value is 0 which is never assigned by the Journal
* and thus indicates that all deltas should be merged.
* if pruneAfterLimit is true and we stop due to hitting limitSequence,
* then the returned delta will have previous=nullptr rather than
* maintaining the chain.
* If the limitSequence means that no deltas will match, returns nullptr.
* */
std::unique_ptr<JournalDelta> merge(
SequenceNumber limitSequence = 0,
bool pruneAfterLimit = false) const;
/** Get memory used (in bytes) by this Delta */
size_t estimateMemoryUsage() const;
@ -127,5 +114,36 @@ class JournalDelta {
friend class JournalDeltaPtr;
};
struct JournalDeltaRange {
/** The current sequence range.
* This is a range to accommodate merging a range into a single entry. */
JournalDelta::SequenceNumber fromSequence;
JournalDelta::SequenceNumber toSequence;
/** The time at which the change was recorded.
* This is a range to accommodate merging a range into a single entry. */
std::chrono::steady_clock::time_point fromTime;
std::chrono::steady_clock::time_point toTime;
/** The snapshot hash that we started and ended up on.
* This will often be the same unless we perform a checkout or make
* a new snapshot from the snapshotable files in the overlay. */
Hash fromHash;
Hash toHash;
/**
* The set of files that changed in the overlay in this update, including
* some information about the changes.
*/
std::unordered_map<RelativePath, PathChangeInfo> changedFilesInOverlay;
/** The set of files that had differing status across a checkout or
* some other operation that changes the snapshot hash */
std::unordered_set<RelativePath> uncleanPaths;
bool isTruncated = false;
JournalDeltaRange() = default;
JournalDeltaRange(JournalDeltaRange&&) = default;
JournalDeltaRange& operator=(JournalDeltaRange&&) = default;
};
} // namespace eden
} // namespace facebook

View File

@ -14,7 +14,7 @@
using namespace facebook::eden;
using ::testing::UnorderedElementsAre;
TEST(Journal, merges_chained_deltas) {
TEST(Journal, accumulate_range_all_changes) {
Journal journal;
// Make an initial entry.
@ -35,42 +35,29 @@ TEST(Journal, merges_chained_deltas) {
EXPECT_EQ(2, latest->fromSequence);
EXPECT_EQ(1, latest->previous->toSequence);
// Check basic merge implementation.
auto merged = latest->merge();
ASSERT_NE(nullptr, merged);
EXPECT_EQ(1, merged->fromSequence);
EXPECT_EQ(2, merged->toSequence);
EXPECT_EQ(2, merged->changedFilesInOverlay.size());
EXPECT_EQ(nullptr, merged->previous);
// Let's try with some limits.
// Check basic sum implementation.
auto summed = journal.accumulateRange();
ASSERT_NE(nullptr, summed);
EXPECT_EQ(1, summed->fromSequence);
EXPECT_EQ(2, summed->toSequence);
EXPECT_EQ(2, summed->changedFilesInOverlay.size());
// First just report the most recent item.
merged = latest->merge(2);
ASSERT_NE(nullptr, merged);
EXPECT_EQ(2, merged->fromSequence);
EXPECT_EQ(2, merged->toSequence);
EXPECT_EQ(1, merged->changedFilesInOverlay.size());
EXPECT_NE(nullptr, merged->previous);
// Prune off sequence==1
merged = latest->merge(2, true);
ASSERT_NE(nullptr, merged);
EXPECT_EQ(2, merged->fromSequence);
EXPECT_EQ(2, merged->toSequence);
EXPECT_EQ(1, merged->changedFilesInOverlay.size());
EXPECT_EQ(nullptr, merged->previous);
summed = journal.accumulateRange(2);
ASSERT_NE(nullptr, summed);
EXPECT_EQ(2, summed->fromSequence);
EXPECT_EQ(2, summed->toSequence);
EXPECT_EQ(1, summed->changedFilesInOverlay.size());
// Merge the first two entries.
merged = latest->merge(1);
ASSERT_NE(nullptr, merged);
EXPECT_EQ(1, merged->fromSequence);
EXPECT_EQ(2, merged->toSequence);
EXPECT_EQ(2, merged->changedFilesInOverlay.size());
EXPECT_EQ(nullptr, merged->previous);
summed = journal.accumulateRange(1);
ASSERT_NE(nullptr, summed);
EXPECT_EQ(1, summed->fromSequence);
EXPECT_EQ(2, summed->toSequence);
EXPECT_EQ(2, summed->changedFilesInOverlay.size());
}
TEST(Journal, mergeRemoveCreateUpdate) {
TEST(Journal, accumulateRangeRemoveCreateUpdate) {
Journal journal;
// Remove test.txt
@ -85,59 +72,59 @@ TEST(Journal, mergeRemoveCreateUpdate) {
EXPECT_EQ(3, latest->toSequence);
EXPECT_EQ(3, latest->fromSequence);
// The merged data should report test.txt as changed
auto merged = latest->merge();
ASSERT_NE(nullptr, merged);
EXPECT_EQ(1, merged->fromSequence);
EXPECT_EQ(3, merged->toSequence);
EXPECT_EQ(1, merged->changedFilesInOverlay.size());
ASSERT_EQ(1, merged->changedFilesInOverlay.count(RelativePath{"test.txt"}));
// The summed data should report test.txt as changed
auto summed = journal.accumulateRange();
ASSERT_NE(nullptr, summed);
EXPECT_EQ(1, summed->fromSequence);
EXPECT_EQ(3, summed->toSequence);
EXPECT_EQ(1, summed->changedFilesInOverlay.size());
ASSERT_EQ(1, summed->changedFilesInOverlay.count(RelativePath{"test.txt"}));
EXPECT_EQ(
true,
merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore);
summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore);
EXPECT_EQ(
true,
merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter);
summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter);
// Test merging only partway back
merged = latest->merge(3);
ASSERT_NE(nullptr, merged);
EXPECT_EQ(3, merged->fromSequence);
EXPECT_EQ(3, merged->toSequence);
EXPECT_EQ(1, merged->changedFilesInOverlay.size());
ASSERT_EQ(1, merged->changedFilesInOverlay.count(RelativePath{"test.txt"}));
summed = journal.accumulateRange(3);
ASSERT_NE(nullptr, summed);
EXPECT_EQ(3, summed->fromSequence);
EXPECT_EQ(3, summed->toSequence);
EXPECT_EQ(1, summed->changedFilesInOverlay.size());
ASSERT_EQ(1, summed->changedFilesInOverlay.count(RelativePath{"test.txt"}));
EXPECT_EQ(
true,
merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore);
summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore);
EXPECT_EQ(
true,
merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter);
summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter);
merged = latest->merge(2);
ASSERT_NE(nullptr, merged);
EXPECT_EQ(2, merged->fromSequence);
EXPECT_EQ(3, merged->toSequence);
EXPECT_EQ(1, merged->changedFilesInOverlay.size());
ASSERT_EQ(1, merged->changedFilesInOverlay.count(RelativePath{"test.txt"}));
summed = journal.accumulateRange(2);
ASSERT_NE(nullptr, summed);
EXPECT_EQ(2, summed->fromSequence);
EXPECT_EQ(3, summed->toSequence);
EXPECT_EQ(1, summed->changedFilesInOverlay.size());
ASSERT_EQ(1, summed->changedFilesInOverlay.count(RelativePath{"test.txt"}));
EXPECT_EQ(
false,
merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore);
summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore);
EXPECT_EQ(
true,
merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter);
summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter);
merged = latest->merge(1);
ASSERT_NE(nullptr, merged);
EXPECT_EQ(1, merged->fromSequence);
EXPECT_EQ(3, merged->toSequence);
EXPECT_EQ(1, merged->changedFilesInOverlay.size());
ASSERT_EQ(1, merged->changedFilesInOverlay.count(RelativePath{"test.txt"}));
summed = journal.accumulateRange(1);
ASSERT_NE(nullptr, summed);
EXPECT_EQ(1, summed->fromSequence);
EXPECT_EQ(3, summed->toSequence);
EXPECT_EQ(1, summed->changedFilesInOverlay.size());
ASSERT_EQ(1, summed->changedFilesInOverlay.count(RelativePath{"test.txt"}));
EXPECT_EQ(
true,
merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore);
summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedBefore);
EXPECT_EQ(
true,
merged->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter);
summed->changedFilesInOverlay[RelativePath{"test.txt"}].existedAfter);
}
TEST(Journal, destruction_does_not_overflow_stack_on_long_chain) {

View File

@ -558,7 +558,6 @@ void EdenServiceHandler::getFilesChangedSince(
#ifndef _WIN32
auto helper = INSTRUMENT_THRIFT_CALL(DBG3, *mountPoint);
auto edenMount = server_->getMount(*mountPoint);
auto delta = edenMount->getJournal().getLatest();
if (fromPosition->mountGeneration !=
static_cast<ssize_t>(edenMount->getMountGeneration())) {
@ -569,22 +568,29 @@ void EdenServiceHandler::getFilesChangedSince(
"You need to compute a new basis for delta queries.");
}
out.toPosition.sequenceNumber = delta->toSequence;
out.toPosition.snapshotHash = thriftHash(delta->toHash);
// The +1 is because the core merge stops at the item prior to
// its limitSequence parameter and we want the changes *since*
// the provided sequence number.
auto summed =
edenMount->getJournal().accumulateRange(fromPosition->sequenceNumber + 1);
// We set the default toPosition to be where we where if summed is null
out.toPosition.sequenceNumber = fromPosition->sequenceNumber;
out.toPosition.snapshotHash = fromPosition->snapshotHash;
out.toPosition.mountGeneration = edenMount->getMountGeneration();
out.fromPosition = out.toPosition;
// The +1 is because the core merge stops at the item prior to
// its limitSequence parameter and we want the changes *since*
// the provided sequence number.
auto merged = delta->merge(fromPosition->sequenceNumber + 1, true);
if (merged) {
out.fromPosition.sequenceNumber = merged->fromSequence;
out.fromPosition.snapshotHash = thriftHash(merged->fromHash);
if (summed) {
out.toPosition.sequenceNumber = summed->toSequence;
out.toPosition.snapshotHash = thriftHash(summed->toHash);
out.toPosition.mountGeneration = edenMount->getMountGeneration();
out.fromPosition.sequenceNumber = summed->fromSequence;
out.fromPosition.snapshotHash = thriftHash(summed->fromHash);
out.fromPosition.mountGeneration = out.toPosition.mountGeneration;
for (const auto& entry : merged->changedFilesInOverlay) {
for (const auto& entry : summed->changedFilesInOverlay) {
auto& path = entry.first;
auto& changeInfo = entry.second;
if (changeInfo.isNew()) {
@ -594,7 +600,7 @@ void EdenServiceHandler::getFilesChangedSince(
}
}
for (auto& path : merged->uncleanPaths) {
for (auto& path : summed->uncleanPaths) {
out.uncleanPaths.emplace_back(path.stringPiece().str());
}
}