update the in-memory snapshot correctly after a commit

Summary:
This fixes "hg commit" so that it correctly updates the in-memory snapshot.
This has been broken ever since I added the in-memory snapshot when
implementing checkout().  The existing scmMarkCommitted() method updated only
the Dirstate object and the on-disk SNAPSHOT file.

This diff fixes checkout() and resetCommit() to clear the Dirstate user
directives correctly, and then replaces calls to scmMarkCommitted() with
resetCommit().

Reviewed By: bolinfest

Differential Revision: D4935943

fbshipit-source-id: 5ffcfd5db99f30c730ede202c5e013afa682bac9
This commit is contained in:
Adam Simpkins 2017-04-24 17:58:43 -07:00 committed by Facebook Github Bot
parent 5da361f55b
commit 87cbfe142b
11 changed files with 156 additions and 136 deletions

View File

@ -32,8 +32,11 @@
#include "eden/fs/store/ObjectStore.h"
#include "eden/fs/store/ObjectStores.h"
using folly::Future;
using folly::makeFuture;
using folly::StringKeyedUnorderedMap;
using folly::StringPiece;
using folly::Unit;
using facebook::eden::overlay::UserStatusDirective;
namespace {
@ -409,7 +412,7 @@ Dirstate::~Dirstate() {}
ThriftHgStatus Dirstate::getStatus(bool listIgnored) const {
ThriftStatusCallback callback(*userDirectives_.rlock());
mount_->diff(&callback, listIgnored);
mount_->diff(&callback, listIgnored).get();
return callback.extractStatus();
}
@ -1189,82 +1192,40 @@ InodePtr Dirstate::getInodeBaseOrNull(RelativePathPiece path) const {
}
}
void Dirstate::markCommitted(
Hash commitID,
const std::vector<RelativePathPiece>& pathsToClean,
const std::vector<RelativePathPiece>& pathsToDrop) {
// First, we update the root tree hash (as well as the hashes of other
// directories in the overlay). Currently, this is stored in two places: in
// the OverlayDir.treeHash for the root tree and in the SNAPSHOT file.
// In practice, the SNAPSHOT file is almost always ignored in code, so it
// mainly exists for debugging. Ultimately, we will probably drop the SNAPSHOT
// file.
auto objectStore = mount_->getObjectStore();
// Note: we immediately wait on the Future here.
// It may take a fairly long time to import the Tree.
auto treeForCommit = objectStore->getTreeForCommit(commitID).get();
if (treeForCommit == nullptr) {
throw std::runtime_error(folly::sformat(
"Cannot mark committed because commit for {} cannot be found.",
commitID.toString()));
}
Future<Unit> Dirstate::onSnapshotChanged(const Tree* rootTree) {
auto* objectStore = mount_->getObjectStore();
// Perform a depth-first traversal of directories in the overlay and update
// the treeHash, as appropriate.
auto overlay = mount_->getOverlay();
auto toIgnore = std::unordered_set<RelativePathPiece>{
RelativePathPiece{".hg"}, RelativePathPiece{kDotEdenName}};
auto modifiedDirectories = getModifiedDirectoriesForMount(mount_, &toIgnore);
for (auto& directory : modifiedDirectories) {
auto treeForDirectory =
getTreeForDirectory(directory, treeForCommit.get(), objectStore);
if (treeForDirectory == nullptr) {
// This could happen because directory is:
// - .hg
// - a bind mount
// - an ignored directory.
continue;
}
auto treeInode = mount_->getTreeInodeBlocking(directory);
DCHECK(treeInode.get() != nullptr) << "Failed to get a TreeInode for "
<< directory;
auto dir = treeInode->getContents().wlock();
dir->treeHash = treeForDirectory->getHash();
overlay->saveOverlayDir(treeInode->getNodeId(), &*dir);
}
// Now that the hashes are written, we update the userDirectives.
{
auto userDirectives = userDirectives_.wlock();
// Do we need to do anything in the overlay at the end of this?
for (auto& path : pathsToClean) {
VLOG(1) << "calling clean on " << path;
auto numErased = userDirectives->erase(path.copy());
if (numErased == 0) {
VLOG(1)
<< "Was supposed to mark path " << path
<< " clean in the dirstate, but was not in userDirectives. "
<< "This is expected if the path was modified rather than added.";
// TODO: It would be much nicer if we stored the user directives in a
// tree-like structure, so we could avoid traversing the source control Tree
// separately each time for every entry in userDirectives.
auto iter = userDirectives->begin();
bool madeChanges = false;
while (iter != userDirectives->end()) {
// If we need to erase this element, it will erase iterators pointing to
// it, but other iterators will be unaffected.
auto current = iter;
++iter;
// Check to see if this entry exists in source control now.
// TODO: We should look up the entry using a futures-based API.
auto entry = getEntryForFile(current->first, rootTree, objectStore);
auto actualStatus = entry ? overlay::UserStatusDirective::Add
: overlay::UserStatusDirective::Remove;
if (current->second == actualStatus) {
userDirectives->erase(current);
madeChanges = true;
}
}
for (auto& path : pathsToDrop) {
VLOG(1) << "calling drop on " << path;
auto numErased = userDirectives->erase(path.copy());
if (numErased == 0) {
VLOG(1) << "Was supposed to drop path " << path
<< " in the dirstate, but was not in userDirectives.";
}
if (madeChanges) {
persistence_.save(*userDirectives);
}
persistence_.save(*userDirectives);
}
// With respect to maintaining consistency, this is the least important I/O
// that we do, so we do it last.
mount_->getConfig()->setSnapshotID(commitID);
return makeFuture();
}
const char kStatusCodeCharClean = 'C';

View File

@ -168,18 +168,12 @@ class Dirstate {
std::vector<DirstateAddRemoveError>* errorsToReport);
/**
* Called as part of `hg commit`, so this does three things (ideally
* atomically):
* 1. Updates the hashes in the Overlay.
* 3. Updates SNAPSHOT to the commitID.
* 4. Applies the changes represented by pathsToClean and pathsToDrop to the
* dirstate. Note that this may not clear the dirstate altogether if the
* user has done `hg commit <specific-files>` or `hg commit -i`.
* Clean up the Dirstate after the current commit has changed.
*
* This removes Add and Remove directives if the corresponding files have
* been added or removed in the new source control state.
*/
void markCommitted(
Hash commitID,
const std::vector<RelativePathPiece>& pathsToClean,
const std::vector<RelativePathPiece>& pathsToDrop);
folly::Future<folly::Unit> onSnapshotChanged(const Tree* rootTree);
private:
/**

View File

@ -231,9 +231,21 @@ folly::Future<std::vector<CheckoutConflict>> EdenMount::checkout(
std::tuple<unique_ptr<Tree>, unique_ptr<Tree>> treeResults) {
auto& fromTree = std::get<0>(treeResults);
auto& toTree = std::get<1>(treeResults);
// TODO: We should change the code to use shared_ptr<Tree>.
// The ObjectStore should always return shared_ptrs so it can cache
// them if we want to do so in the future.
auto toTreeCopy = make_unique<Tree>(*toTree);
ctx->start(this->acquireRenameLock());
return this->getRootInode()->checkout(
ctx.get(), std::move(fromTree), std::move(toTree));
return this->getRootInode()
->checkout(ctx.get(), std::move(fromTree), std::move(toTree))
.then([toTreeCopy = std::move(toTreeCopy)]() mutable {
return std::move(toTreeCopy);
});
})
.then([this](std::unique_ptr<Tree> toTree) {
return dirstate_->onSnapshotChanged(toTree.get());
})
.then([this, ctx, oldSnapshot, snapshotHash]() {
// Save the new snapshot hash
@ -283,22 +295,37 @@ Future<Unit> EdenMount::diff(InodeDiffCallback* callback, bool listIgnored) {
.ensure(std::move(stateHolder));
}
void EdenMount::resetCommit(Hash snapshotHash) {
// We currently don't verify that snapshotHash refers to a valid commit
// in the ObjectStore. We could do that just for verification purposes.
Future<Unit> EdenMount::resetCommit(Hash snapshotHash) {
// Hold the snapshot lock around the entire operation.
auto snapshotLock = currentSnapshot_.wlock();
auto oldSnapshot = *snapshotLock;
VLOG(1) << "resetting snapshot for " << this->getPath() << " from "
<< oldSnapshot << " to " << snapshotHash;
*snapshotLock = snapshotHash;
this->config_->setSnapshotID(snapshotHash);
auto journalDelta = make_unique<JournalDelta>();
journalDelta->fromHash = oldSnapshot;
journalDelta->toHash = snapshotHash;
journal_.wlock()->addDelta(std::move(journalDelta));
// TODO: Maybe we should walk the inodes and see if we can dematerialize some
// files using the new source control state.
//
// It probably makes sense to do this if/when we convert the Dirstate user
// directives into a tree-like data structure.
return objectStore_->getTreeForCommit(snapshotHash)
.then([this](std::unique_ptr<Tree> rootTree) {
return dirstate_->onSnapshotChanged(rootTree.get());
})
.then([
this,
snapshotHash,
oldSnapshot,
snapshotLock = std::move(snapshotLock)
]() {
*snapshotLock = snapshotHash;
this->config_->setSnapshotID(snapshotHash);
auto journalDelta = make_unique<JournalDelta>();
journalDelta->fromHash = oldSnapshot;
journalDelta->toHash = snapshotHash;
journal_.wlock()->addDelta(std::move(journalDelta));
});
}
RenameLock EdenMount::acquireRenameLock() {

View File

@ -9,6 +9,7 @@
*/
#pragma once
#include <folly/Portability.h>
#include <folly/SharedMutex.h>
#include <folly/Synchronized.h>
#include <folly/ThreadLocal.h>
@ -239,16 +240,25 @@ class EdenMount {
* @param listIgnored Whether or not to inform the callback of ignored files.
* When listIgnored to false can speed up the diff computation, as the
* code does not need to descend into ignord directories at all.
*
* @return Returns a folly::Future that will be fulfilled when the diff
* operation is complete. This is marked FOLLY_WARN_UNUSED_RESULT to
* make sure callers do not forget to wait for the operatio to complete.
*/
folly::Future<folly::Unit> diff(
FOLLY_WARN_UNUSED_RESULT folly::Future<folly::Unit> diff(
InodeDiffCallback* callback,
bool listIgnored = false);
/**
* Reset the state to point to the specified commit, without modifying
* the working directory contents at all.
*
* @return Returns a folly::Future that will be fulfilled when the operation
* is complete. This is marked FOLLY_WARN_UNUSED_RESULT to make sure
* callers do not forget to wait for the operation to complete.
*/
void resetCommit(Hash snapshotHash);
FOLLY_WARN_UNUSED_RESULT folly::Future<folly::Unit> resetCommit(
Hash snapshotHash);
/**
* Acquire the rename lock in exclusive mode.

View File

@ -382,7 +382,7 @@ void testModifyConflict(
// resetCommit(), as the files will be materialized this way.
auto commit1 = testMount.getBackingStore()->putCommit("a", builder1);
commit1->setReady();
testMount.getEdenMount()->resetCommit(makeTestHash("a"));
testMount.getEdenMount()->resetCommit(makeTestHash("a")).get();
// Prepare the destination tree
auto builder2 = builder1.clone();

View File

@ -54,7 +54,7 @@ TEST(EdenMount, resetCommit) {
EXPECT_FALSE(testMount.hasFileAt("src/extra.h"));
// Reset the TestMount to pointing to commit2
edenMount->resetCommit(makeTestHash("2"));
edenMount->resetCommit(makeTestHash("2")).get();
// The snapshot ID should be updated, both in memory and on disk
EXPECT_EQ(makeTestHash("2"), edenMount->getSnapshotID());
EXPECT_EQ(makeTestHash("2"), edenMount->getConfig()->getSnapshotID());

View File

@ -194,7 +194,7 @@ void EdenServiceHandler::resetParentCommit(
std::unique_ptr<std::string> hash) {
auto hashObj = hashFromThrift(*hash);
auto edenMount = server_->getMount(*mountPoint);
edenMount->resetCommit(hashObj);
edenMount->resetCommit(hashObj).get();
}
void EdenServiceHandler::getSHA1(
@ -443,31 +443,6 @@ void EdenServiceHandler::scmRemove(
}
}
void EdenServiceHandler::scmMarkCommitted(
std::unique_ptr<std::string> mountPoint,
std::unique_ptr<std::string> commitID,
std::unique_ptr<std::vector<std::string>> pathsToCleanAsStrings,
std::unique_ptr<std::vector<std::string>> pathsToDropAsStrings) {
auto dirstate = server_->getMount(*mountPoint)->getDirstate();
DCHECK(dirstate != nullptr) << "Failed to get dirstate for "
<< mountPoint.get();
auto hash = hashFromThrift(*commitID);
std::vector<RelativePathPiece> pathsToClean;
pathsToClean.reserve(pathsToCleanAsStrings->size());
for (auto& path : *pathsToCleanAsStrings.get()) {
pathsToClean.emplace_back(RelativePathPiece(path));
}
std::vector<RelativePathPiece> pathsToDrop;
pathsToDrop.reserve(pathsToDropAsStrings->size());
for (auto& path : *pathsToDropAsStrings.get()) {
pathsToDrop.emplace_back(RelativePathPiece(path));
}
dirstate->markCommitted(hash, pathsToClean, pathsToDrop);
}
void EdenServiceHandler::debugGetScmTree(
vector<ScmTreeEntry>& entries,
unique_ptr<string> mountPoint,

View File

@ -104,12 +104,6 @@ class EdenServiceHandler : virtual public StreamingEdenServiceSvIf,
std::unique_ptr<std::vector<std::string>> paths,
bool force) override;
void scmMarkCommitted(
std::unique_ptr<std::string> mountPoint,
std::unique_ptr<std::string> commitID,
std::unique_ptr<std::vector<std::string>> pathsToClear,
std::unique_ptr<std::vector<std::string>> pathsToDrop) override;
void debugGetScmTree(
std::vector<ScmTreeEntry>& entries,
std::unique_ptr<std::string> mountPoint,

View File

@ -333,13 +333,6 @@ service EdenService extends fb303.FacebookService {
3: bool force
) throws (1: EdenError ex)
void scmMarkCommitted(
1: string mountPoint,
2: binary commitID,
3: list<string> pathsToClean,
4: list<string> pathsToDrop,
) throws (1: EdenError ex)
//////// Debugging APIs ////////
/**

View File

@ -165,7 +165,14 @@ void TestMount::resetCommit(
auto* storedCommit =
backingStore_->putCommit(commitHash, rootTree->get().getHash());
storedCommit->setReady();
edenMount_->resetCommit(commitHash);
// The root tree needs to be made ready too, even if setReady is false.
// resetCommit() won't return until until the root tree can be loaded.
if (!setReady) {
rootTree->setReady();
}
edenMount_->resetCommit(commitHash).get();
}
void TestMount::setInitialCommit(Hash commitHash) {

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python3
#
# Copyright (c) 2004-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import os
from .lib.hg_extension_test_base import HgExtensionTestBase
class CommitTest(HgExtensionTestBase):
def populate_backing_repo(self, repo):
repo.write_file('hello.txt', 'hola')
repo.write_file('foo/bar.txt', 'test\n')
repo.write_file('foo/subdir/test.txt', 'test\n')
self.commit1 = repo.commit('Initial commit.\n')
def test_commit_modification(self):
'''Test committing a modification to an existing file'''
self.assert_status_empty()
self.write_file('foo/bar.txt', 'test version 2\n')
self.assert_status({'foo/bar.txt': 'M'})
commit2 = self.repo.commit('Updated bar.txt\n')
self.assertNotEqual(self.commit1, commit2)
self.assert_status_empty()
self.assertEqual('test version 2\n', self.read_file('foo/bar.txt'))
def test_commit_new_file(self):
'''Test committing a new file'''
self.assert_status_empty()
self.write_file('foo/new.txt', 'new and improved\n')
self.assert_status({'foo/new.txt': '?'})
self.hg('add', 'foo/new.txt')
self.assert_status({'foo/new.txt': 'A'})
commit2 = self.repo.commit('Added new.txt\n')
self.assertNotEqual(self.commit1, commit2)
self.assert_status_empty()
self.assertEqual('new and improved\n', self.read_file('foo/new.txt'))
def test_commit_remove_file(self):
'''Test a commit that removes a file'''
self.assert_status_empty()
self.hg('rm', 'foo/subdir/test.txt')
self.assertFalse(os.path.exists(self.get_path('foo/subdir/test.txt')))
self.assert_status({'foo/subdir/test.txt': 'R'})
commit2 = self.repo.commit('Removed test.txt\n')
self.assertNotEqual(self.commit1, commit2)
self.assert_status_empty()
self.assertFalse(os.path.exists(self.get_path('foo/subdir/test.txt')))