have Overlay track nextInodeNumber_ instead of InodeMap

Summary:
The Overlay is the natural home for nextInodeNumber_ now that every
directory allocates inode numbers for its children right away. This
also simplifies serializing nextInodeNumber_ to disk in the following
diff.

Reviewed By: simpkins

Differential Revision: D8192442

fbshipit-source-id: 9b776a73c8d7653002b55985d592b1746e52f878
This commit is contained in:
Chad Austin 2018-05-31 01:40:43 -07:00 committed by Facebook Github Bot
parent d7766f9cce
commit 5409f230eb
9 changed files with 153 additions and 123 deletions

View File

@ -191,12 +191,13 @@ folly::Future<folly::Unit> EdenMount::initialize(
// Do this before the root TreeInode is allocated in case it needs to allocate
// any inode numbers.
if (!takeover) {
auto maxInodeNumber = overlay_->getMaxRecordedInode();
auto maxInodeNumber = overlay_->scanForNextInodeNumber();
XLOG(DBG2) << "Initializing eden mount " << getPath()
<< "; max existing inode number is " << maxInodeNumber;
inodeMap_->setMaximumExistingInodeNumber(maxInodeNumber);
} else {
XLOG(DBG2) << "Initializing eden mount " << getPath() << " from takeover";
overlay_->setNextInodeNumber(
InodeNumber::fromThrift(takeover->nextInodeNumber));
}
return createRootInode(*parents).then(
@ -220,7 +221,7 @@ folly::Future<folly::Unit> EdenMount::initialize(
folly::Future<TreeInodePtr> EdenMount::createRootInode(
const ParentCommits& parentCommits) {
// Load the overlay, if present.
auto rootOverlayDir = overlay_->loadOverlayDir(kRootNodeId, getInodeMap());
auto rootOverlayDir = overlay_->loadOverlayDir(kRootNodeId);
if (rootOverlayDir) {
return TreeInodePtr::makeNew(
this, std::move(rootOverlayDir->first), rootOverlayDir->second);
@ -354,7 +355,9 @@ EdenMount::shutdownImpl(bool doTakeover) {
// This is important during graceful restart to ensure that we have
// released the lock before the new edenfs process begins to take over
// the mount piont.
overlay_->close();
inodeMap.nextInodeNumber = overlay_->close();
CHECK(inodeMap.nextInodeNumber)
<< "nextInodeNumber should always be nonzero";
state_.store(State::SHUT_DOWN);
return std::make_tuple(fileHandleMap, inodeMap);
});

View File

@ -107,12 +107,6 @@ InodeMap::~InodeMap() {
// destroy the EdenMount.
}
void InodeMap::setMaximumExistingInodeNumber(InodeNumber max) {
DCHECK_GE(max, kRootNodeId);
auto previous = nextInodeNumber_.exchange(max.get() + 1);
DCHECK_EQ(0, previous) << "setMaximumExistingInodeNumber called twice";
}
void InodeMap::initialize(TreeInodePtr root) {
auto data = data_.wlock();
CHECK(!root_);
@ -130,14 +124,11 @@ void InodeMap::initializeFromTakeover(
<< "cannot load InodeMap data over a populated instance";
CHECK_EQ(data->unloadedInodes_.size(), 0)
<< "cannot load InodeMap data over a populated instance";
CHECK_EQ(nextInodeNumber_.load(), 0)
<< "cannot load InodeMap data over a populated instance";
CHECK(!root_);
root_ = std::move(root);
auto ret = data->loadedInodes_.emplace(kRootNodeId, root_.get());
CHECK(ret.second);
nextInodeNumber_.store(takeover.nextInodeNumber);
for (const auto& entry : takeover.unloadedInodes) {
if (entry.numFuseReferences < 0) {
auto message = folly::to<std::string>(
@ -171,8 +162,8 @@ void InodeMap::initializeFromTakeover(
}
XLOG(DBG2) << "InodeMap initialized mount " << mount_->getPath()
<< " from takeover: nextInodeNumber=" << nextInodeNumber_.load()
<< ", " << data->unloadedInodes_.size() << " inodes registered";
<< " from takeover, " << data->unloadedInodes_.size()
<< " inodes registered";
}
Future<InodePtr> InodeMap::lookupInode(InodeNumber number) {
@ -623,9 +614,6 @@ Future<SerializedInodeMap> InodeMap::shutdown(bool doTakeover) {
}
SerializedInodeMap result;
// Therefore, at this point, nobody is calling allocateInodeNumber(), so
// it's safe to read nextInodeNumber_.
result.nextInodeNumber = nextInodeNumber_.load();
XLOG(DBG5) << "InodeMap::save nextInodeNumber: " << result.nextInodeNumber;
result.unloadedInodes.reserve(data->unloadedInodes_.size());
for (const auto& it : data->unloadedInodes_) {
@ -881,8 +869,6 @@ bool InodeMap::shouldLoadChild(
InodeNumber childInode,
folly::Promise<InodePtr> promise) {
auto data = data_.wlock();
// This is a sanity check - no big deal if we race with allocateInodeNumber.
CHECK_LT(childInode.get(), nextInodeNumber_.load());
auto iter = data->unloadedInodes_.find(childInode);
UnloadedInode* unloadedData{nullptr};
if (iter == data->unloadedInodes_.end()) {
@ -908,25 +894,6 @@ bool InodeMap::shouldLoadChild(
return isFirstPromise;
}
InodeNumber InodeMap::allocateInodeNumber() {
// InodeNumber should generally be 64-bits wide, in which case it isn't even
// worth bothering to handle the case where nextInodeNumber_ wraps. We don't
// need to bother checking for conflicts with existing inode numbers since
// this can only happen if we wrap around. We don't currently support
// platforms with 32-bit inode numbers.
static_assert(
sizeof(nextInodeNumber_) == sizeof(InodeNumber),
"expected nextInodeNumber_ and InodeNumber to have the same size");
static_assert(
sizeof(InodeNumber) >= 8, "expected InodeNumber to be at least 64 bits");
// This could be a relaxed atomic operation. It doesn't matter on x86 but
// might on ARM.
auto previous = nextInodeNumber_++;
DCHECK_NE(0, previous) << "allocateInodeNumber called before initialize";
return InodeNumber{previous};
}
void InodeMap::inodeCreated(const InodePtr& inode) {
XLOG(DBG4) << "created new inode " << inode->getNodeId() << ": "
<< inode->getLogPath();

View File

@ -108,18 +108,6 @@ class InodeMap {
InodeMap(InodeMap&&) = default;
InodeMap& operator=(InodeMap&&) = default;
/**
* Set the maximum existing inode number. This must be called prior to
* calling allocateInodeNumber() and must not be called twice.
*
* @param max The maximum inode number currently assigned to
* any inode in the filesystem. For newly created file systems this
* should be FUSE_ROOT_ID. If this is a mount point that has been
* mounted before, this should be the maximum value across all the
* outstanding inodes.
*/
void setMaximumExistingInodeNumber(InodeNumber max);
/**
* Initialize the InodeMap
*
@ -386,25 +374,6 @@ class InodeMap {
InodeNumber number,
const folly::exception_wrapper& exception);
/**
* allocateInodeNumber() should only be called by TreeInode.
*
* This can be called:
* - To allocate an inode number for an existing tree entry that does not
* need to be loaded yet.
* - To allocate an inode number for a brand new inode being created by
* TreeInode::create() or TreeInode::mkdir(). In this case
* inodeCreated() should be called immediately afterwards to register the
* new child Inode object.
*
* It is illegal to call allocateInodeNumber prior to
* setMaximumExistingInodeNumber.
*
* TODO: It would be easy to extend this function to allocate a range of
* inode values in one atomic operation.
*/
InodeNumber allocateInodeNumber();
void inodeCreated(const InodePtr& inode);
struct LoadedInodeCounts {
@ -659,12 +628,6 @@ class InodeMap {
* the InodeMap while holding their own lock.)
*/
folly::Synchronized<Members> data_;
/**
* The next inode number to allocate. Zero indicates that
* setMaximumExistingInodeNumber has not been called.
*/
std::atomic<uint64_t> nextInodeNumber_{0};
};
/**

View File

@ -90,11 +90,11 @@ Overlay::~Overlay() {
close();
}
void Overlay::close() {
uint64_t Overlay::close() {
CHECK_NE(std::this_thread::get_id(), gcThread_.get_id());
if (!infoFile_) {
return;
return nextInodeNumber_.load(std::memory_order_relaxed);
}
// Make sure everything is shut down in reverse of construction order.
@ -106,6 +106,8 @@ void Overlay::close() {
inodeMetadataTable_.reset();
dirFile_.close();
infoFile_.close();
return nextInodeNumber_.load(std::memory_order_relaxed);
}
void Overlay::initOverlay() {
@ -212,11 +214,39 @@ void Overlay::initNewOverlay() {
auto infoPath = localDir_ + PathComponentPiece{kInfoFile};
folly::writeFileAtomic(
infoPath.stringPiece(), ByteRange(infoHeader.data(), infoHeader.size()));
// kRootNodeId is reserved - start at the next one.
setNextInodeNumber(InodeNumber{kRootNodeId.get() + 1});
}
void Overlay::setNextInodeNumber(InodeNumber nextInodeNumber) {
DCHECK_GT(nextInodeNumber, kRootNodeId);
auto previous = nextInodeNumber_.exchange(nextInodeNumber.get());
DCHECK_EQ(0, previous) << "setNextInodeNumber called after "
"nextInodeNumber_ was already initialized";
}
InodeNumber Overlay::allocateInodeNumber() {
// InodeNumber should generally be 64-bits wide, in which case it isn't even
// worth bothering to handle the case where nextInodeNumber_ wraps. We don't
// need to bother checking for conflicts with existing inode numbers since
// this can only happen if we wrap around. We don't currently support
// platforms with 32-bit inode numbers.
static_assert(
sizeof(nextInodeNumber_) == sizeof(InodeNumber),
"expected nextInodeNumber_ and InodeNumber to have the same size");
static_assert(
sizeof(InodeNumber) >= 8, "expected InodeNumber to be at least 64 bits");
// This could be a relaxed atomic operation. It doesn't matter on x86 but
// might on ARM.
auto previous = nextInodeNumber_++;
DCHECK_NE(0, previous) << "allocateInodeNumber called before initialize";
return InodeNumber{previous};
}
Optional<std::pair<TreeInode::Dir, InodeTimestamps>> Overlay::loadOverlayDir(
InodeNumber inodeNumber,
InodeMap* inodeMap) {
InodeNumber inodeNumber) {
TreeInode::Dir result;
InodeTimestamps timestamps;
auto dirData = deserializeOverlayDir(inodeNumber, timestamps);
@ -236,7 +266,7 @@ Optional<std::pair<TreeInode::Dir, InodeTimestamps>> Overlay::loadOverlayDir(
if (value.inodeNumber) {
ino = InodeNumber::fromThrift(value.inodeNumber);
} else {
ino = inodeMap->allocateInodeNumber();
ino = allocateInodeNumber();
shouldMigrateToNewFormat = true;
}
@ -351,7 +381,13 @@ bool Overlay::hasOverlayData(InodeNumber inodeNumber) {
}
}
InodeNumber Overlay::getMaxRecordedInode() {
InodeNumber Overlay::scanForNextInodeNumber() {
if (auto ino = nextInodeNumber_.load(std::memory_order_relaxed)) {
// Already defined.
CHECK_GT(ino, 1);
return InodeNumber{ino - 1};
}
// TODO: We should probably store the max inode number in the header file
// during graceful unmount. When opening an overlay we can then simply read
// back the max inode number from this file if the overlay was shut down
@ -413,6 +449,8 @@ InodeNumber Overlay::getMaxRecordedInode() {
}
}
setNextInodeNumber(InodeNumber{maxInode.get() + 1});
return maxInode;
}

View File

@ -32,6 +32,7 @@ struct InodeMetadata;
template <typename T>
class InodeTable;
using InodeMetadataTable = InodeTable<InodeMetadata>;
struct SerializedInodeMap;
/** Manages the write overlay storage area.
*
@ -61,9 +62,54 @@ class Overlay {
* InodeMetadataTable concurrently or call any other Overlay method
* concurrently with or after calling close(). The Overlay will try to detect
* this with assertions but cannot always detect concurrent access.
*
* Returns the next available InodeNumber to be passed to any process taking
* over an Eden mount.
*/
void close();
uint64_t close();
/**
* For now, this method exists to populate the next inode number from takeover
* data. Eventually, it will be unnecessary - the Overlay, upon a clean
* shutdown, will remember its next inode number in a file on disk.
*/
void setNextInodeNumber(InodeNumber nextInodeNumber);
/**
* Scans the Overlay for all inode numbers currently in use and sets the next
* inode number to the maximum plus one. Either this or setNextInodeNumber
* should be called when opening a mount point to ensure that any future
* allocated inode numbers are always greater than those already tracked in
* the overlay.
*
* Returns the maximum existing inode number.
*/
InodeNumber scanForNextInodeNumber();
/**
* allocateInodeNumber() should only be called by TreeInode.
*
* This can be called:
* - To allocate an inode number for an existing tree entry that does not
* need to be loaded yet.
* - To allocate an inode number for a brand new inode being created by
* TreeInode::create() or TreeInode::mkdir(). In this case
* inodeCreated() should be called immediately afterwards to register the
* new child Inode object.
*
* It is illegal to call allocateInodeNumber prior to
* setNextInodeNumber or scanForNextInodeNumber.
*
* TODO: It would be easy to extend this function to allocate a range of
* inode values in one atomic operation.
*/
InodeNumber allocateInodeNumber();
/**
* Returns an InodeMetadataTable for accessing and storing inode metadata.
* Owned by the Overlay so records can be removed when the Overlay discovers
* it no longer needs data for an inode or its children.
*/
InodeMetadataTable* getInodeMetadataTable() const {
return inodeMetadataTable_.get();
}
@ -73,8 +119,7 @@ class Overlay {
const TreeInode::Dir& dir,
const InodeTimestamps& timestamps);
folly::Optional<std::pair<TreeInode::Dir, InodeTimestamps>> loadOverlayDir(
InodeNumber inodeNumber,
InodeMap* inodeMap);
InodeNumber inodeNumber);
void removeOverlayData(InodeNumber inodeNumber);
@ -134,15 +179,6 @@ class Overlay {
int fd,
const InodeTimestamps& timeStamps);
/**
* Get the maximum inode number stored in the overlay.
*
* This is called when opening a mount point, to make sure that new inodes
* handed out from this point forwards are always greater than any inodes
* already tracked in the overlay.
*/
InodeNumber getMaxRecordedInode();
/**
* Constants for an header in overlay file.
*/
@ -233,6 +269,14 @@ class Overlay {
/** path to ".eden/CLIENT/local" */
const AbsolutePath localDir_;
/**
* The next inode number to allocate. Zero indicates that neither
* initializeFromTakeover nor getMaxRecordedInode have been called.
*
* This value will never be 1.
*/
std::atomic<uint64_t> nextInodeNumber_{0};
/**
* An open file descriptor to the overlay info file.
*

View File

@ -381,8 +381,7 @@ void TreeInode::loadUnlinkedChildInode(
Dir dir;
folly::Optional<InodeTimestamps> fromOverlay;
auto overlayContents =
getOverlay()->loadOverlayDir(number, getMount()->getInodeMap());
auto overlayContents = getOverlay()->loadOverlayDir(number);
if (overlayContents) {
fromOverlay = overlayContents->second;
}
@ -831,7 +830,7 @@ Overlay* TreeInode::getOverlay() const {
folly::Optional<std::pair<TreeInode::Dir, InodeTimestamps>>
TreeInode::loadOverlayDir(InodeNumber inodeNumber) const {
return getOverlay()->loadOverlayDir(inodeNumber, getInodeMap());
return getOverlay()->loadOverlayDir(inodeNumber);
}
void TreeInode::saveOverlayDir(const Dir& contents) const {
@ -856,28 +855,32 @@ TreeInode::Dir TreeInode::saveDirFromTree(
InodeNumber inodeNumber,
const Tree* tree,
EdenMount* mount) {
auto dir = buildDirFromTree(tree, mount->getInodeMap());
auto overlay = mount->getOverlay();
auto dir = buildDirFromTree(tree, overlay);
// buildDirFromTree just allocated inode numbers; they should be saved.
mount->getOverlay()->saveOverlayDir(
overlay->saveOverlayDir(
inodeNumber, dir, InodeTimestamps{mount->getLastCheckoutTime()});
return dir;
}
TreeInode::Dir TreeInode::buildDirFromTree(
const Tree* tree,
InodeMap* inodeMap) {
TreeInode::Dir TreeInode::buildDirFromTree(const Tree* tree, Overlay* overlay) {
// Now build out the Dir based on what we know.
Dir dir;
if (!tree) {
return dir;
}
// A future optimization is for this code to allocate all of the inode numbers
// at once and then dole them out, one per entry. It would reduce the number
// of atomic operations from N to 1, though if the atomic is issued with the
// other work this loop is doing it may not matter much.
dir.treeHash = tree->getHash();
for (const auto& treeEntry : tree->getTreeEntries()) {
dir.entries.emplace(
treeEntry.getName(),
modeFromTreeEntryType(treeEntry.getType()),
inodeMap->allocateInodeNumber(),
overlay->allocateInodeNumber(),
treeEntry.getHash());
}
return dir;
@ -926,8 +929,7 @@ FileInodePtr TreeInode::createImpl(
targetName = myPath.value() + name;
// Generate an inode number for this new entry.
auto* inodeMap = this->getInodeMap();
auto childNumber = inodeMap->allocateInodeNumber();
auto childNumber = getOverlay()->allocateInodeNumber();
// Create the overlay file before we insert the file into our entries map.
auto currentTime = getNow();
@ -958,7 +960,7 @@ FileInodePtr TreeInode::createImpl(
}
entry.setInode(inode.get());
inodeMap->inodeCreated(inode);
getInodeMap()->inodeCreated(inode);
auto timestamps = updateMtimeAndCtime(getNow());
saveOverlayDir(*contents, timestamps);
@ -1089,8 +1091,7 @@ TreeInodePtr TreeInode::mkdir(PathComponentPiece name, mode_t mode) {
}
// Allocate an inode number
auto* inodeMap = this->getInodeMap();
auto childNumber = inodeMap->allocateInodeNumber();
auto childNumber = getOverlay()->allocateInodeNumber();
// The mode passed in by the caller may not have the file type bits set.
// Ensure that we mark this as a directory.
@ -1118,7 +1119,7 @@ TreeInodePtr TreeInode::mkdir(PathComponentPiece name, mode_t mode) {
childTimestamps,
std::move(emptyDir));
entry.setInode(newChild.get());
inodeMap->inodeCreated(newChild);
getInodeMap()->inodeCreated(newChild);
// Save our updated overlay data
auto timestamps = updateMtimeAndCtime(now);
@ -2445,7 +2446,7 @@ unique_ptr<CheckoutAction> TreeInode::processCheckoutEntry(
contents.entries.emplace(
newScmEntry->getName(),
modeFromTreeEntryType(newScmEntry->getType()),
getInodeMap()->allocateInodeNumber(),
getOverlay()->allocateInodeNumber(),
newScmEntry->getHash());
invalidateFuseCache(newScmEntry->getName());
contentsUpdated = true;
@ -2467,7 +2468,7 @@ unique_ptr<CheckoutAction> TreeInode::processCheckoutEntry(
contents.entries.emplace(
newScmEntry->getName(),
modeFromTreeEntryType(newScmEntry->getType()),
getInodeMap()->allocateInodeNumber(),
getOverlay()->allocateInodeNumber(),
newScmEntry->getHash());
invalidateFuseCache(newScmEntry->getName());
contentsUpdated = true;
@ -2562,7 +2563,7 @@ unique_ptr<CheckoutAction> TreeInode::processCheckoutEntry(
contents.entries.erase(it);
} else {
entry = Entry{modeFromTreeEntryType(newScmEntry->getType()),
getInodeMap()->allocateInodeNumber(),
getOverlay()->allocateInodeNumber(),
newScmEntry->getHash()};
}
@ -2634,7 +2635,7 @@ Future<Unit> TreeInode::checkoutUpdateEntry(
DCHECK_EQ(newScmEntry->getName(), name);
it->second = Entry(
modeFromTreeEntryType(newScmEntry->getType()),
getInodeMap()->allocateInodeNumber(),
getOverlay()->allocateInodeNumber(),
newScmEntry->getHash());
} else {
contents->entries.erase(it);
@ -2701,7 +2702,7 @@ Future<Unit> TreeInode::checkoutUpdateEntry(
auto ret = contents->entries.emplace(
name,
modeFromTreeEntryType(newScmEntry->getType()),
parentInode->getInodeMap()->allocateInodeNumber(),
parentInode->getOverlay()->allocateInodeNumber(),
newScmEntry->getHash());
inserted = ret.second;
}

View File

@ -674,7 +674,7 @@ class TreeInode : public InodeBase {
/** Translates a Tree object from our store into a Dir object
* used to track the directory in the inode */
static Dir buildDirFromTree(const Tree* tree, InodeMap* inodeMap);
static Dir buildDirFromTree(const Tree* tree, Overlay* overlay);
/**
* Get a TreeInodePtr to ourself.

View File

@ -138,8 +138,7 @@ TEST_F(OverlayTest, roundTripThroughSaveAndLoad) {
overlay->saveOverlayDir(10_ino, dir, InodeTimestamps{});
auto result =
overlay->loadOverlayDir(10_ino, mount_.getEdenMount()->getInodeMap());
auto result = overlay->loadOverlayDir(10_ino);
ASSERT_TRUE(result);
const auto* newDir = &result->first;
@ -187,7 +186,8 @@ class RawOverlayTest : public ::testing::Test {
};
TEST_F(RawOverlayTest, max_inode_number_is_1_if_overlay_is_empty) {
EXPECT_EQ(kRootNodeId, overlay->getMaxRecordedInode());
EXPECT_EQ(kRootNodeId, overlay->scanForNextInodeNumber());
EXPECT_EQ(2_ino, overlay->allocateInodeNumber());
}
TEST_F(RawOverlayTest, remembers_max_inode_number_of_tree_inodes) {
@ -196,7 +196,7 @@ TEST_F(RawOverlayTest, remembers_max_inode_number_of_tree_inodes) {
recreate();
EXPECT_EQ(2_ino, overlay->getMaxRecordedInode());
EXPECT_EQ(2_ino, overlay->scanForNextInodeNumber());
}
TEST_F(RawOverlayTest, remembers_max_inode_number_of_tree_entries) {
@ -207,7 +207,7 @@ TEST_F(RawOverlayTest, remembers_max_inode_number_of_tree_entries) {
recreate();
EXPECT_EQ(4_ino, overlay->getMaxRecordedInode());
EXPECT_EQ(4_ino, overlay->scanForNextInodeNumber());
}
TEST_F(RawOverlayTest, remembers_max_inode_number_of_file) {
@ -219,7 +219,7 @@ TEST_F(RawOverlayTest, remembers_max_inode_number_of_file) {
recreate();
EXPECT_EQ(3_ino, overlay->getMaxRecordedInode());
EXPECT_EQ(3_ino, overlay->scanForNextInodeNumber());
}
TEST_F(RawOverlayTest, inode_numbers_not_reused_after_unclean_shutdown) {
@ -238,7 +238,7 @@ TEST_F(RawOverlayTest, inode_numbers_not_reused_after_unclean_shutdown) {
recreate();
EXPECT_EQ(5_ino, overlay->getMaxRecordedInode());
EXPECT_EQ(5_ino, overlay->scanForNextInodeNumber());
}
TEST_F(RawOverlayTest, inode_numbers_after_takeover) {
@ -265,7 +265,7 @@ TEST_F(RawOverlayTest, inode_numbers_after_takeover) {
// Ensure an inode in the overlay but not referenced by the previous session
// counts.
EXPECT_EQ(5_ino, overlay->getMaxRecordedInode());
EXPECT_EQ(5_ino, overlay->scanForNextInodeNumber());
}
} // namespace eden

View File

@ -87,3 +87,17 @@ class PersistenceTest(testcase.EdenRepoTest):
new_stat.st_ctime,
f"ctime must line up for path {path}",
)
def test_does_not_reuse_inode_numbers_after_cold_restart(self):
newdir1 = os.path.join(self.mount, "subdir", "newdir1")
os.mkdir(newdir1)
newdir_stat1 = os.lstat(newdir1)
self.eden.shutdown()
self.eden.start()
newdir2 = os.path.join(self.mount, "subdir", "newdir2")
os.mkdir(newdir2)
newdir_stat2 = os.lstat(newdir2)
self.assertGreater(newdir_stat2.st_ino, newdir_stat1.st_ino)