From 87cbfe142b10b6a1ceaed5d1d86139679a53c3fc Mon Sep 17 00:00:00 2001 From: Adam Simpkins Date: Mon, 24 Apr 2017 17:58:43 -0700 Subject: [PATCH] update the in-memory snapshot correctly after a commit Summary: This fixes "hg commit" so that it correctly updates the in-memory snapshot. This has been broken ever since I added the in-memory snapshot when implementing checkout(). The existing scmMarkCommitted() method updated only the Dirstate object and the on-disk SNAPSHOT file. This diff fixes checkout() and resetCommit() to clear the Dirstate user directives correctly, and then replaces calls to scmMarkCommitted() with resetCommit(). Reviewed By: bolinfest Differential Revision: D4935943 fbshipit-source-id: 5ffcfd5db99f30c730ede202c5e013afa682bac9 --- eden/fs/inodes/Dirstate.cpp | 97 ++++++++------------------ eden/fs/inodes/Dirstate.h | 16 ++--- eden/fs/inodes/EdenMount.cpp | 53 ++++++++++---- eden/fs/inodes/EdenMount.h | 14 +++- eden/fs/inodes/test/CheckoutTest.cpp | 2 +- eden/fs/inodes/test/EdenMountTest.cpp | 2 +- eden/fs/service/EdenServiceHandler.cpp | 27 +------ eden/fs/service/EdenServiceHandler.h | 6 -- eden/fs/service/eden.thrift | 7 -- eden/fs/testharness/TestMount.cpp | 9 ++- eden/integration/hg/commit_test.py | 59 ++++++++++++++++ 11 files changed, 156 insertions(+), 136 deletions(-) create mode 100644 eden/integration/hg/commit_test.py diff --git a/eden/fs/inodes/Dirstate.cpp b/eden/fs/inodes/Dirstate.cpp index 9fa71cf933..fd9bef55af 100644 --- a/eden/fs/inodes/Dirstate.cpp +++ b/eden/fs/inodes/Dirstate.cpp @@ -32,8 +32,11 @@ #include "eden/fs/store/ObjectStore.h" #include "eden/fs/store/ObjectStores.h" +using folly::Future; +using folly::makeFuture; using folly::StringKeyedUnorderedMap; using folly::StringPiece; +using folly::Unit; using facebook::eden::overlay::UserStatusDirective; namespace { @@ -409,7 +412,7 @@ Dirstate::~Dirstate() {} ThriftHgStatus Dirstate::getStatus(bool listIgnored) const { ThriftStatusCallback callback(*userDirectives_.rlock()); - mount_->diff(&callback, listIgnored); + mount_->diff(&callback, listIgnored).get(); return callback.extractStatus(); } @@ -1189,82 +1192,40 @@ InodePtr Dirstate::getInodeBaseOrNull(RelativePathPiece path) const { } } -void Dirstate::markCommitted( - Hash commitID, - const std::vector& pathsToClean, - const std::vector& pathsToDrop) { - // First, we update the root tree hash (as well as the hashes of other - // directories in the overlay). Currently, this is stored in two places: in - // the OverlayDir.treeHash for the root tree and in the SNAPSHOT file. - // In practice, the SNAPSHOT file is almost always ignored in code, so it - // mainly exists for debugging. Ultimately, we will probably drop the SNAPSHOT - // file. - auto objectStore = mount_->getObjectStore(); - // Note: we immediately wait on the Future here. - // It may take a fairly long time to import the Tree. - auto treeForCommit = objectStore->getTreeForCommit(commitID).get(); - if (treeForCommit == nullptr) { - throw std::runtime_error(folly::sformat( - "Cannot mark committed because commit for {} cannot be found.", - commitID.toString())); - } +Future Dirstate::onSnapshotChanged(const Tree* rootTree) { + auto* objectStore = mount_->getObjectStore(); - // Perform a depth-first traversal of directories in the overlay and update - // the treeHash, as appropriate. - auto overlay = mount_->getOverlay(); - auto toIgnore = std::unordered_set{ - RelativePathPiece{".hg"}, RelativePathPiece{kDotEdenName}}; - auto modifiedDirectories = getModifiedDirectoriesForMount(mount_, &toIgnore); - for (auto& directory : modifiedDirectories) { - auto treeForDirectory = - getTreeForDirectory(directory, treeForCommit.get(), objectStore); - if (treeForDirectory == nullptr) { - // This could happen because directory is: - // - .hg - // - a bind mount - // - an ignored directory. - continue; - } - - auto treeInode = mount_->getTreeInodeBlocking(directory); - DCHECK(treeInode.get() != nullptr) << "Failed to get a TreeInode for " - << directory; - - auto dir = treeInode->getContents().wlock(); - dir->treeHash = treeForDirectory->getHash(); - overlay->saveOverlayDir(treeInode->getNodeId(), &*dir); - } - - // Now that the hashes are written, we update the userDirectives. { auto userDirectives = userDirectives_.wlock(); - // Do we need to do anything in the overlay at the end of this? - for (auto& path : pathsToClean) { - VLOG(1) << "calling clean on " << path; - auto numErased = userDirectives->erase(path.copy()); - if (numErased == 0) { - VLOG(1) - << "Was supposed to mark path " << path - << " clean in the dirstate, but was not in userDirectives. " - << "This is expected if the path was modified rather than added."; + + // TODO: It would be much nicer if we stored the user directives in a + // tree-like structure, so we could avoid traversing the source control Tree + // separately each time for every entry in userDirectives. + auto iter = userDirectives->begin(); + bool madeChanges = false; + while (iter != userDirectives->end()) { + // If we need to erase this element, it will erase iterators pointing to + // it, but other iterators will be unaffected. + auto current = iter; + ++iter; + + // Check to see if this entry exists in source control now. + // TODO: We should look up the entry using a futures-based API. + auto entry = getEntryForFile(current->first, rootTree, objectStore); + auto actualStatus = entry ? overlay::UserStatusDirective::Add + : overlay::UserStatusDirective::Remove; + if (current->second == actualStatus) { + userDirectives->erase(current); + madeChanges = true; } } - for (auto& path : pathsToDrop) { - VLOG(1) << "calling drop on " << path; - auto numErased = userDirectives->erase(path.copy()); - if (numErased == 0) { - VLOG(1) << "Was supposed to drop path " << path - << " in the dirstate, but was not in userDirectives."; - } + if (madeChanges) { + persistence_.save(*userDirectives); } - - persistence_.save(*userDirectives); } - // With respect to maintaining consistency, this is the least important I/O - // that we do, so we do it last. - mount_->getConfig()->setSnapshotID(commitID); + return makeFuture(); } const char kStatusCodeCharClean = 'C'; diff --git a/eden/fs/inodes/Dirstate.h b/eden/fs/inodes/Dirstate.h index e8aed217e6..e4915207d1 100644 --- a/eden/fs/inodes/Dirstate.h +++ b/eden/fs/inodes/Dirstate.h @@ -168,18 +168,12 @@ class Dirstate { std::vector* errorsToReport); /** - * Called as part of `hg commit`, so this does three things (ideally - * atomically): - * 1. Updates the hashes in the Overlay. - * 3. Updates SNAPSHOT to the commitID. - * 4. Applies the changes represented by pathsToClean and pathsToDrop to the - * dirstate. Note that this may not clear the dirstate altogether if the - * user has done `hg commit ` or `hg commit -i`. + * Clean up the Dirstate after the current commit has changed. + * + * This removes Add and Remove directives if the corresponding files have + * been added or removed in the new source control state. */ - void markCommitted( - Hash commitID, - const std::vector& pathsToClean, - const std::vector& pathsToDrop); + folly::Future onSnapshotChanged(const Tree* rootTree); private: /** diff --git a/eden/fs/inodes/EdenMount.cpp b/eden/fs/inodes/EdenMount.cpp index cce71287b6..45d0464b6d 100644 --- a/eden/fs/inodes/EdenMount.cpp +++ b/eden/fs/inodes/EdenMount.cpp @@ -231,9 +231,21 @@ folly::Future> EdenMount::checkout( std::tuple, unique_ptr> treeResults) { auto& fromTree = std::get<0>(treeResults); auto& toTree = std::get<1>(treeResults); + + // TODO: We should change the code to use shared_ptr. + // The ObjectStore should always return shared_ptrs so it can cache + // them if we want to do so in the future. + auto toTreeCopy = make_unique(*toTree); + ctx->start(this->acquireRenameLock()); - return this->getRootInode()->checkout( - ctx.get(), std::move(fromTree), std::move(toTree)); + return this->getRootInode() + ->checkout(ctx.get(), std::move(fromTree), std::move(toTree)) + .then([toTreeCopy = std::move(toTreeCopy)]() mutable { + return std::move(toTreeCopy); + }); + }) + .then([this](std::unique_ptr toTree) { + return dirstate_->onSnapshotChanged(toTree.get()); }) .then([this, ctx, oldSnapshot, snapshotHash]() { // Save the new snapshot hash @@ -283,22 +295,37 @@ Future EdenMount::diff(InodeDiffCallback* callback, bool listIgnored) { .ensure(std::move(stateHolder)); } -void EdenMount::resetCommit(Hash snapshotHash) { - // We currently don't verify that snapshotHash refers to a valid commit - // in the ObjectStore. We could do that just for verification purposes. - +Future EdenMount::resetCommit(Hash snapshotHash) { + // Hold the snapshot lock around the entire operation. auto snapshotLock = currentSnapshot_.wlock(); auto oldSnapshot = *snapshotLock; - VLOG(1) << "resetting snapshot for " << this->getPath() << " from " << oldSnapshot << " to " << snapshotHash; - *snapshotLock = snapshotHash; - this->config_->setSnapshotID(snapshotHash); - auto journalDelta = make_unique(); - journalDelta->fromHash = oldSnapshot; - journalDelta->toHash = snapshotHash; - journal_.wlock()->addDelta(std::move(journalDelta)); + // TODO: Maybe we should walk the inodes and see if we can dematerialize some + // files using the new source control state. + // + // It probably makes sense to do this if/when we convert the Dirstate user + // directives into a tree-like data structure. + + return objectStore_->getTreeForCommit(snapshotHash) + .then([this](std::unique_ptr rootTree) { + return dirstate_->onSnapshotChanged(rootTree.get()); + }) + .then([ + this, + snapshotHash, + oldSnapshot, + snapshotLock = std::move(snapshotLock) + ]() { + *snapshotLock = snapshotHash; + this->config_->setSnapshotID(snapshotHash); + + auto journalDelta = make_unique(); + journalDelta->fromHash = oldSnapshot; + journalDelta->toHash = snapshotHash; + journal_.wlock()->addDelta(std::move(journalDelta)); + }); } RenameLock EdenMount::acquireRenameLock() { diff --git a/eden/fs/inodes/EdenMount.h b/eden/fs/inodes/EdenMount.h index 936d1f2dd3..2fdc473b6b 100644 --- a/eden/fs/inodes/EdenMount.h +++ b/eden/fs/inodes/EdenMount.h @@ -9,6 +9,7 @@ */ #pragma once +#include #include #include #include @@ -239,16 +240,25 @@ class EdenMount { * @param listIgnored Whether or not to inform the callback of ignored files. * When listIgnored to false can speed up the diff computation, as the * code does not need to descend into ignord directories at all. + * + * @return Returns a folly::Future that will be fulfilled when the diff + * operation is complete. This is marked FOLLY_WARN_UNUSED_RESULT to + * make sure callers do not forget to wait for the operatio to complete. */ - folly::Future diff( + FOLLY_WARN_UNUSED_RESULT folly::Future diff( InodeDiffCallback* callback, bool listIgnored = false); /** * Reset the state to point to the specified commit, without modifying * the working directory contents at all. + * + * @return Returns a folly::Future that will be fulfilled when the operation + * is complete. This is marked FOLLY_WARN_UNUSED_RESULT to make sure + * callers do not forget to wait for the operation to complete. */ - void resetCommit(Hash snapshotHash); + FOLLY_WARN_UNUSED_RESULT folly::Future resetCommit( + Hash snapshotHash); /** * Acquire the rename lock in exclusive mode. diff --git a/eden/fs/inodes/test/CheckoutTest.cpp b/eden/fs/inodes/test/CheckoutTest.cpp index 361abd3ec7..0b6c5a627d 100644 --- a/eden/fs/inodes/test/CheckoutTest.cpp +++ b/eden/fs/inodes/test/CheckoutTest.cpp @@ -382,7 +382,7 @@ void testModifyConflict( // resetCommit(), as the files will be materialized this way. auto commit1 = testMount.getBackingStore()->putCommit("a", builder1); commit1->setReady(); - testMount.getEdenMount()->resetCommit(makeTestHash("a")); + testMount.getEdenMount()->resetCommit(makeTestHash("a")).get(); // Prepare the destination tree auto builder2 = builder1.clone(); diff --git a/eden/fs/inodes/test/EdenMountTest.cpp b/eden/fs/inodes/test/EdenMountTest.cpp index d4be8cd089..c2b1ee9b8b 100644 --- a/eden/fs/inodes/test/EdenMountTest.cpp +++ b/eden/fs/inodes/test/EdenMountTest.cpp @@ -54,7 +54,7 @@ TEST(EdenMount, resetCommit) { EXPECT_FALSE(testMount.hasFileAt("src/extra.h")); // Reset the TestMount to pointing to commit2 - edenMount->resetCommit(makeTestHash("2")); + edenMount->resetCommit(makeTestHash("2")).get(); // The snapshot ID should be updated, both in memory and on disk EXPECT_EQ(makeTestHash("2"), edenMount->getSnapshotID()); EXPECT_EQ(makeTestHash("2"), edenMount->getConfig()->getSnapshotID()); diff --git a/eden/fs/service/EdenServiceHandler.cpp b/eden/fs/service/EdenServiceHandler.cpp index d1f7452dac..0118389d69 100644 --- a/eden/fs/service/EdenServiceHandler.cpp +++ b/eden/fs/service/EdenServiceHandler.cpp @@ -194,7 +194,7 @@ void EdenServiceHandler::resetParentCommit( std::unique_ptr hash) { auto hashObj = hashFromThrift(*hash); auto edenMount = server_->getMount(*mountPoint); - edenMount->resetCommit(hashObj); + edenMount->resetCommit(hashObj).get(); } void EdenServiceHandler::getSHA1( @@ -443,31 +443,6 @@ void EdenServiceHandler::scmRemove( } } -void EdenServiceHandler::scmMarkCommitted( - std::unique_ptr mountPoint, - std::unique_ptr commitID, - std::unique_ptr> pathsToCleanAsStrings, - std::unique_ptr> pathsToDropAsStrings) { - auto dirstate = server_->getMount(*mountPoint)->getDirstate(); - DCHECK(dirstate != nullptr) << "Failed to get dirstate for " - << mountPoint.get(); - - auto hash = hashFromThrift(*commitID); - std::vector pathsToClean; - pathsToClean.reserve(pathsToCleanAsStrings->size()); - for (auto& path : *pathsToCleanAsStrings.get()) { - pathsToClean.emplace_back(RelativePathPiece(path)); - } - - std::vector pathsToDrop; - pathsToDrop.reserve(pathsToDropAsStrings->size()); - for (auto& path : *pathsToDropAsStrings.get()) { - pathsToDrop.emplace_back(RelativePathPiece(path)); - } - - dirstate->markCommitted(hash, pathsToClean, pathsToDrop); -} - void EdenServiceHandler::debugGetScmTree( vector& entries, unique_ptr mountPoint, diff --git a/eden/fs/service/EdenServiceHandler.h b/eden/fs/service/EdenServiceHandler.h index 20dca6391e..7b274d1f95 100644 --- a/eden/fs/service/EdenServiceHandler.h +++ b/eden/fs/service/EdenServiceHandler.h @@ -104,12 +104,6 @@ class EdenServiceHandler : virtual public StreamingEdenServiceSvIf, std::unique_ptr> paths, bool force) override; - void scmMarkCommitted( - std::unique_ptr mountPoint, - std::unique_ptr commitID, - std::unique_ptr> pathsToClear, - std::unique_ptr> pathsToDrop) override; - void debugGetScmTree( std::vector& entries, std::unique_ptr mountPoint, diff --git a/eden/fs/service/eden.thrift b/eden/fs/service/eden.thrift index 135e77527c..604927cb72 100644 --- a/eden/fs/service/eden.thrift +++ b/eden/fs/service/eden.thrift @@ -333,13 +333,6 @@ service EdenService extends fb303.FacebookService { 3: bool force ) throws (1: EdenError ex) - void scmMarkCommitted( - 1: string mountPoint, - 2: binary commitID, - 3: list pathsToClean, - 4: list pathsToDrop, - ) throws (1: EdenError ex) - //////// Debugging APIs //////// /** diff --git a/eden/fs/testharness/TestMount.cpp b/eden/fs/testharness/TestMount.cpp index e443468035..a2e3131dcd 100644 --- a/eden/fs/testharness/TestMount.cpp +++ b/eden/fs/testharness/TestMount.cpp @@ -165,7 +165,14 @@ void TestMount::resetCommit( auto* storedCommit = backingStore_->putCommit(commitHash, rootTree->get().getHash()); storedCommit->setReady(); - edenMount_->resetCommit(commitHash); + + // The root tree needs to be made ready too, even if setReady is false. + // resetCommit() won't return until until the root tree can be loaded. + if (!setReady) { + rootTree->setReady(); + } + + edenMount_->resetCommit(commitHash).get(); } void TestMount::setInitialCommit(Hash commitHash) { diff --git a/eden/integration/hg/commit_test.py b/eden/integration/hg/commit_test.py new file mode 100644 index 0000000000..26d24ce9ee --- /dev/null +++ b/eden/integration/hg/commit_test.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2004-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. + +import os + +from .lib.hg_extension_test_base import HgExtensionTestBase + + +class CommitTest(HgExtensionTestBase): + def populate_backing_repo(self, repo): + repo.write_file('hello.txt', 'hola') + repo.write_file('foo/bar.txt', 'test\n') + repo.write_file('foo/subdir/test.txt', 'test\n') + self.commit1 = repo.commit('Initial commit.\n') + + def test_commit_modification(self): + '''Test committing a modification to an existing file''' + self.assert_status_empty() + + self.write_file('foo/bar.txt', 'test version 2\n') + self.assert_status({'foo/bar.txt': 'M'}) + + commit2 = self.repo.commit('Updated bar.txt\n') + self.assertNotEqual(self.commit1, commit2) + self.assert_status_empty() + self.assertEqual('test version 2\n', self.read_file('foo/bar.txt')) + + def test_commit_new_file(self): + '''Test committing a new file''' + self.assert_status_empty() + + self.write_file('foo/new.txt', 'new and improved\n') + self.assert_status({'foo/new.txt': '?'}) + self.hg('add', 'foo/new.txt') + self.assert_status({'foo/new.txt': 'A'}) + + commit2 = self.repo.commit('Added new.txt\n') + self.assertNotEqual(self.commit1, commit2) + self.assert_status_empty() + self.assertEqual('new and improved\n', self.read_file('foo/new.txt')) + + def test_commit_remove_file(self): + '''Test a commit that removes a file''' + self.assert_status_empty() + + self.hg('rm', 'foo/subdir/test.txt') + self.assertFalse(os.path.exists(self.get_path('foo/subdir/test.txt'))) + self.assert_status({'foo/subdir/test.txt': 'R'}) + + commit2 = self.repo.commit('Removed test.txt\n') + self.assertNotEqual(self.commit1, commit2) + self.assert_status_empty() + self.assertFalse(os.path.exists(self.get_path('foo/subdir/test.txt')))