sapling/eden/fs/inodes/Traverse.cpp

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This software may be used and distributed according to the terms of the
 * GNU General Public License version 2.
 */

#include "eden/fs/inodes/Traverse.h"

#include <folly/logging/xlog.h>
#include "eden/fs/inodes/EdenMount.h"
#include "eden/fs/inodes/FileInode.h"
#include "eden/fs/inodes/TreeInode.h"

namespace facebook::eden {

namespace {

std::vector<ChildEntry> parseDirContents(const DirContents& contents) {
  std::vector<ChildEntry> results;
  results.reserve(contents.size());
  for (const auto& [name, entry] : contents) {
    results.push_back(ChildEntry{
        name,
        entry.getDtype(),
        entry.getInodeNumber(),
        entry.getOptionalHash(),
        entry.getInodePtr()});
  }
  return results;
}

} // namespace

void traverseTreeInodeChildren(
    Overlay* overlay,
    const std::vector<ChildEntry>& children,
    RelativePathPiece rootPath,
    InodeNumber ino,
    const std::optional<ObjectId>& hash,
    uint64_t fsRefcount,
    TraversalCallbacks& callbacks) {
  XLOG(DBG7) << "Traversing: " << rootPath;
  callbacks.visitTreeInode(rootPath, ino, hash, fsRefcount, children);
  for (auto& entry : children) {
    auto childPath = rootPath + entry.name;
    if (auto child = entry.loadedChild) {
      if (auto* loadedTreeInode = child.asTreeOrNull()) {
        if (callbacks.shouldRecurse(entry)) {
          traverseObservedInodes(*loadedTreeInode, childPath, callbacks);
        }
      }
    } else {
      if (dtype_t::Dir == entry.dtype) {
        if (callbacks.shouldRecurse(entry)) {
          // If we are able to load a child directory from the overlay, then
          // this child entry has been allocated, and can be traversed.
          auto contents = overlay->loadOverlayDir(entry.ino);
          if (!contents.empty()) {
            traverseTreeInodeChildren(
                overlay,
                parseDirContents(contents),
                childPath,
                entry.ino,
                entry.hash,
                0,
                callbacks);
          }
        }
      }
    }
  }
}

void traverseObservedInodes(
    const TreeInode& root,
    RelativePathPiece rootPath,
    TraversalCallbacks& callbacks) {
  auto* overlay = root.getMount()->getOverlay();

  std::vector<ChildEntry> children;
  std::optional<ObjectId> hash;
  {
    auto contents = root.getContents().rlock();
    children = parseDirContents(contents->entries);
    hash = contents->treeHash;
  }

  traverseTreeInodeChildren(
      overlay,
      children,
      rootPath,
      root.getNodeId(),
      hash,
      root.debugGetFsRefcount(),
      callbacks);
}

} // namespace facebook::eden
add observed inode traversal functions Summary: Add a function that makes it easy to traverse EdenFS's view of the traversed-so-far filesystem. Reviewed By: xavierd Differential Revision: D24299962 fbshipit-source-id: 275c849846bf45a2e80780411d60266b961d825b 2020-10-23 21:13:13 +03:00			`/*`
fs: fix license header Summary: With Facebook having been renamed Meta Platforms, we need to change the license headers. Reviewed By: fanzeyi Differential Revision: D33407812 fbshipit-source-id: b11bfbbf13a48873f0cea75f212cc7b07a68fb2e 2022-01-05 01:58:22 +03:00			`* Copyright (c) Meta Platforms, Inc. and affiliates.`
add observed inode traversal functions Summary: Add a function that makes it easy to traverse EdenFS's view of the traversed-so-far filesystem. Reviewed By: xavierd Differential Revision: D24299962 fbshipit-source-id: 275c849846bf45a2e80780411d60266b961d825b 2020-10-23 21:13:13 +03:00			`*`
			`* This software may be used and distributed according to the terms of the`
			`* GNU General Public License version 2.`
			`*/`

			`#include "eden/fs/inodes/Traverse.h"`

			`#include <folly/logging/xlog.h>`
			`#include "eden/fs/inodes/EdenMount.h"`
			`#include "eden/fs/inodes/FileInode.h"`
			`#include "eden/fs/inodes/TreeInode.h"`

			`namespace facebook::eden {`

			`namespace {`

			`std::vector<ChildEntry> parseDirContents(const DirContents& contents) {`
			`std::vector<ChildEntry> results;`
			`results.reserve(contents.size());`
			`for (const auto& [name, entry] : contents) {`
Apply clang-format update fixes Reviewed By: igorsugak Differential Revision: D25861960 fbshipit-source-id: e3c39c080429058a58cdc66d45350e5d1420f98c 2021-01-10 21:03:53 +03:00			`results.push_back(ChildEntry{`
			`name,`
			`entry.getDtype(),`
			`entry.getInodeNumber(),`
			`entry.getOptionalHash(),`
			`entry.getInodePtr()});`
add observed inode traversal functions Summary: Add a function that makes it easy to traverse EdenFS's view of the traversed-so-far filesystem. Reviewed By: xavierd Differential Revision: D24299962 fbshipit-source-id: 275c849846bf45a2e80780411d60266b961d825b 2020-10-23 21:13:13 +03:00			`}`
			`return results;`
			`}`

			`} // namespace`

			`void traverseTreeInodeChildren(`
			`Overlay* overlay,`
			`const std::vector<ChildEntry>& children,`
			`RelativePathPiece rootPath,`
			`InodeNumber ino,`
separate out ObjectId [proxy hash removal 1/n] Summary: The goal of this stack is to remove Proxy Hash type, but to achieve that we need first to address some tech debt in Eden codebase. For the long time EdenFs had single Hash type that was used for many different use cases. One of major uses for Hash type is identifies internal EdenFs objects such as blobs, trees, and others. We seem to reach agreement that we need a different type for those identifiers, so we introduce separate ObjectId type in this diff to denote new identifier type and replace _some_ usage of Hash with ObjectId. We still retain original Hash type for other use cases. Roughly speaking, this is how this diff separates between Hash and ObjectId: ObjectId: * Everything that is stored in local store(blobs, trees, commits) Hash20: * Explicit hashes(Sha1 of the blob) * Hg identifiers: manifest id and blob hg ig For now, in this diff ObjectId has exactly same content as Hash, but this will change in the future diffs. Doing this way allows to keep diff size manageable, while migrating to new ObjectId right away would produce insanely large diff that would be both hard to make and review. There are few more things that needs to be done before we can get to the meat of removing proxy hashes: 1) Replace include Hash.h with ObjectId.h where needed 2) Remove Hash type, explicitly rename rest of Hash usages to Hash20 3) Modify content of ObjectId to support new use cases 4) Modify serialized metadata and possibly other places that assume ObjectId size is fixed and equal to Hash20 size Reviewed By: chadaustin Differential Revision: D31316477 fbshipit-source-id: 0d5e4460a461bcaac6b9fd884517e129aeaf4baf 2021-10-01 20:24:21 +03:00			`const std::optional<ObjectId>& hash,`
inodes: rename FUSE refcount to fs refcount Summary: One of the issue that EdenFS on Windows is currently facing is around invalidation during an update. In effect, EdenFS is over invalidating, which causes update to be slower than it should be, as well as EdenFS recursively triggering ProjectedFS callbacks during invalidation. Both of these are a sub-par UX. The reason this issue exist is multi-faceted. First, the update code follows the "kPreciseInodeNumberMemory" path which enforces that a directory that is present in the overlay needs to be invalidated, even if it isn't materialized. The second reason is that no reclamation is done for the overlay, combine the two and you get an update that gets both slower over time and will issue significantly more invalidation that is needed. Solving this is a bit involved. We could for instance start by reclaiming inodes from the overlay, but this wouldn't be effective as we use the fact that an inode is present in the overlay as a way to know that the file is cached in the overlay. If we reclaim from the overlay we simply won't be invalidating enough and some files will be out of date. It turns out that we already have a mechanism to track what is cached by the kernel: the fuse refcount. On Linux/macOS, everytime an inode is returned to the kernel, this refcount incremented, and the kernel then notifies us when it forgot about it, at which point the refcount can be decremented. On Windows, the rules are a bit different, and a simple flag is sufficient: set when we write a placeholder on disk (either during a directory listing, or when ProjectedFS asks for it), and unset at invalidation time during update. There is however a small snag in this plan. On Linux, the refcount starts at 0 when EdenFS starts as a mount/unmount will clear all the kernel references on the inodes. On Windows, the placeholder aren't disappearing when EdenFS dies or is stopped, so we need a way to scan the working copy when EdenFS starts to know which inodes should be loaded (an UnloadedInode really). The astute reader will have noticed that this last part is effectively a O(materialized) operation that needs to happen at startup, which would be fairly expensive in itself. It turns out that we really don't have choice and we need to do it regardless due to Windows not disallowing writes to the working copy when EdenFS is stopped, and thus for EdenFS to be aware of the actual state of the working copy, it needs to scan it at startup... The first step in doing all of this is to simply rename the various places that uses "fuse refcount" to "fs refcount" which is what this diff does. Reviewed By: chadaustin Differential Revision: D24716801 fbshipit-source-id: e9e6ccff14c454e9f2626fab23daeb3930554b1a 2020-11-05 04:32:07 +03:00			`uint64_t fsRefcount,`
add observed inode traversal functions Summary: Add a function that makes it easy to traverse EdenFS's view of the traversed-so-far filesystem. Reviewed By: xavierd Differential Revision: D24299962 fbshipit-source-id: 275c849846bf45a2e80780411d60266b961d825b 2020-10-23 21:13:13 +03:00			`TraversalCallbacks& callbacks) {`
inodes: fix infinite loop in traverseTreeInodeChildren Summary: We want to load the children overlay, not its parent. In the case where only materialized files were requested, the code would infinitively recurse onto non-loaded but materialized inodes. Reviewed By: chadaustin Differential Revision: D34775507 fbshipit-source-id: ed5f4a2cba5fb3cfd03990b6b8696d65e84dde72 2022-03-11 02:32:32 +03:00			`XLOG(DBG7) << "Traversing: " << rootPath;`
inodes: rename FUSE refcount to fs refcount Summary: One of the issue that EdenFS on Windows is currently facing is around invalidation during an update. In effect, EdenFS is over invalidating, which causes update to be slower than it should be, as well as EdenFS recursively triggering ProjectedFS callbacks during invalidation. Both of these are a sub-par UX. The reason this issue exist is multi-faceted. First, the update code follows the "kPreciseInodeNumberMemory" path which enforces that a directory that is present in the overlay needs to be invalidated, even if it isn't materialized. The second reason is that no reclamation is done for the overlay, combine the two and you get an update that gets both slower over time and will issue significantly more invalidation that is needed. Solving this is a bit involved. We could for instance start by reclaiming inodes from the overlay, but this wouldn't be effective as we use the fact that an inode is present in the overlay as a way to know that the file is cached in the overlay. If we reclaim from the overlay we simply won't be invalidating enough and some files will be out of date. It turns out that we already have a mechanism to track what is cached by the kernel: the fuse refcount. On Linux/macOS, everytime an inode is returned to the kernel, this refcount incremented, and the kernel then notifies us when it forgot about it, at which point the refcount can be decremented. On Windows, the rules are a bit different, and a simple flag is sufficient: set when we write a placeholder on disk (either during a directory listing, or when ProjectedFS asks for it), and unset at invalidation time during update. There is however a small snag in this plan. On Linux, the refcount starts at 0 when EdenFS starts as a mount/unmount will clear all the kernel references on the inodes. On Windows, the placeholder aren't disappearing when EdenFS dies or is stopped, so we need a way to scan the working copy when EdenFS starts to know which inodes should be loaded (an UnloadedInode really). The astute reader will have noticed that this last part is effectively a O(materialized) operation that needs to happen at startup, which would be fairly expensive in itself. It turns out that we really don't have choice and we need to do it regardless due to Windows not disallowing writes to the working copy when EdenFS is stopped, and thus for EdenFS to be aware of the actual state of the working copy, it needs to scan it at startup... The first step in doing all of this is to simply rename the various places that uses "fuse refcount" to "fs refcount" which is what this diff does. Reviewed By: chadaustin Differential Revision: D24716801 fbshipit-source-id: e9e6ccff14c454e9f2626fab23daeb3930554b1a 2020-11-05 04:32:07 +03:00			`callbacks.visitTreeInode(rootPath, ino, hash, fsRefcount, children);`
add observed inode traversal functions Summary: Add a function that makes it easy to traverse EdenFS's view of the traversed-so-far filesystem. Reviewed By: xavierd Differential Revision: D24299962 fbshipit-source-id: 275c849846bf45a2e80780411d60266b961d825b 2020-10-23 21:13:13 +03:00			`for (auto& entry : children) {`
			`auto childPath = rootPath + entry.name;`
			`if (auto child = entry.loadedChild) {`
			`if (auto* loadedTreeInode = child.asTreeOrNull()) {`
			`if (callbacks.shouldRecurse(entry)) {`
			`traverseObservedInodes(*loadedTreeInode, childPath, callbacks);`
			`}`
			`}`
			`} else {`
			`if (dtype_t::Dir == entry.dtype) {`
			`if (callbacks.shouldRecurse(entry)) {`
			`// If we are able to load a child directory from the overlay, then`
			`// this child entry has been allocated, and can be traversed.`
inodes: fix infinite loop in traverseTreeInodeChildren Summary: We want to load the children overlay, not its parent. In the case where only materialized files were requested, the code would infinitively recurse onto non-loaded but materialized inodes. Reviewed By: chadaustin Differential Revision: D34775507 fbshipit-source-id: ed5f4a2cba5fb3cfd03990b6b8696d65e84dde72 2022-03-11 02:32:32 +03:00			`auto contents = overlay->loadOverlayDir(entry.ino);`
inodes: make loadOverlayDir to return empty dir when missing Summary: We don't need to care if a directory from overlay exists or just empty. This saves the overlay implementation from keeping track of what directories are stored and which are not. This diff changes `Overlay::loadOverlayDir` to return `DirContents` instead of `std::optional`. The same method on `IOverlay` remain unchanged to give the backing overlay implementation the freedom to choose. Reviewed By: chadaustin Differential Revision: D25507406 fbshipit-source-id: f7edcc55485fabeedfe11e9f269eea15a3cc32ad 2021-03-02 20:55:38 +03:00			`if (!contents.empty()) {`
add observed inode traversal functions Summary: Add a function that makes it easy to traverse EdenFS's view of the traversed-so-far filesystem. Reviewed By: xavierd Differential Revision: D24299962 fbshipit-source-id: 275c849846bf45a2e80780411d60266b961d825b 2020-10-23 21:13:13 +03:00			`traverseTreeInodeChildren(`
			`overlay,`
inodes: make loadOverlayDir to return empty dir when missing Summary: We don't need to care if a directory from overlay exists or just empty. This saves the overlay implementation from keeping track of what directories are stored and which are not. This diff changes `Overlay::loadOverlayDir` to return `DirContents` instead of `std::optional`. The same method on `IOverlay` remain unchanged to give the backing overlay implementation the freedom to choose. Reviewed By: chadaustin Differential Revision: D25507406 fbshipit-source-id: f7edcc55485fabeedfe11e9f269eea15a3cc32ad 2021-03-02 20:55:38 +03:00			`parseDirContents(contents),`
add observed inode traversal functions Summary: Add a function that makes it easy to traverse EdenFS's view of the traversed-so-far filesystem. Reviewed By: xavierd Differential Revision: D24299962 fbshipit-source-id: 275c849846bf45a2e80780411d60266b961d825b 2020-10-23 21:13:13 +03:00			`childPath,`
			`entry.ino,`
			`entry.hash,`
			`0,`
			`callbacks);`
			`}`
			`}`
			`}`
			`}`
			`}`
			`}`

			`void traverseObservedInodes(`
			`const TreeInode& root,`
			`RelativePathPiece rootPath,`
			`TraversalCallbacks& callbacks) {`
			`auto* overlay = root.getMount()->getOverlay();`

			`std::vector<ChildEntry> children;`
separate out ObjectId [proxy hash removal 1/n] Summary: The goal of this stack is to remove Proxy Hash type, but to achieve that we need first to address some tech debt in Eden codebase. For the long time EdenFs had single Hash type that was used for many different use cases. One of major uses for Hash type is identifies internal EdenFs objects such as blobs, trees, and others. We seem to reach agreement that we need a different type for those identifiers, so we introduce separate ObjectId type in this diff to denote new identifier type and replace _some_ usage of Hash with ObjectId. We still retain original Hash type for other use cases. Roughly speaking, this is how this diff separates between Hash and ObjectId: ObjectId: * Everything that is stored in local store(blobs, trees, commits) Hash20: * Explicit hashes(Sha1 of the blob) * Hg identifiers: manifest id and blob hg ig For now, in this diff ObjectId has exactly same content as Hash, but this will change in the future diffs. Doing this way allows to keep diff size manageable, while migrating to new ObjectId right away would produce insanely large diff that would be both hard to make and review. There are few more things that needs to be done before we can get to the meat of removing proxy hashes: 1) Replace include Hash.h with ObjectId.h where needed 2) Remove Hash type, explicitly rename rest of Hash usages to Hash20 3) Modify content of ObjectId to support new use cases 4) Modify serialized metadata and possibly other places that assume ObjectId size is fixed and equal to Hash20 size Reviewed By: chadaustin Differential Revision: D31316477 fbshipit-source-id: 0d5e4460a461bcaac6b9fd884517e129aeaf4baf 2021-10-01 20:24:21 +03:00			`std::optional<ObjectId> hash;`
add observed inode traversal functions Summary: Add a function that makes it easy to traverse EdenFS's view of the traversed-so-far filesystem. Reviewed By: xavierd Differential Revision: D24299962 fbshipit-source-id: 275c849846bf45a2e80780411d60266b961d825b 2020-10-23 21:13:13 +03:00			`{`
			`auto contents = root.getContents().rlock();`
			`children = parseDirContents(contents->entries);`
			`hash = contents->treeHash;`
			`}`

			`traverseTreeInodeChildren(`
			`overlay,`
			`children,`
			`rootPath,`
			`root.getNodeId(),`
			`hash,`
inodes: rename FUSE refcount to fs refcount Summary: One of the issue that EdenFS on Windows is currently facing is around invalidation during an update. In effect, EdenFS is over invalidating, which causes update to be slower than it should be, as well as EdenFS recursively triggering ProjectedFS callbacks during invalidation. Both of these are a sub-par UX. The reason this issue exist is multi-faceted. First, the update code follows the "kPreciseInodeNumberMemory" path which enforces that a directory that is present in the overlay needs to be invalidated, even if it isn't materialized. The second reason is that no reclamation is done for the overlay, combine the two and you get an update that gets both slower over time and will issue significantly more invalidation that is needed. Solving this is a bit involved. We could for instance start by reclaiming inodes from the overlay, but this wouldn't be effective as we use the fact that an inode is present in the overlay as a way to know that the file is cached in the overlay. If we reclaim from the overlay we simply won't be invalidating enough and some files will be out of date. It turns out that we already have a mechanism to track what is cached by the kernel: the fuse refcount. On Linux/macOS, everytime an inode is returned to the kernel, this refcount incremented, and the kernel then notifies us when it forgot about it, at which point the refcount can be decremented. On Windows, the rules are a bit different, and a simple flag is sufficient: set when we write a placeholder on disk (either during a directory listing, or when ProjectedFS asks for it), and unset at invalidation time during update. There is however a small snag in this plan. On Linux, the refcount starts at 0 when EdenFS starts as a mount/unmount will clear all the kernel references on the inodes. On Windows, the placeholder aren't disappearing when EdenFS dies or is stopped, so we need a way to scan the working copy when EdenFS starts to know which inodes should be loaded (an UnloadedInode really). The astute reader will have noticed that this last part is effectively a O(materialized) operation that needs to happen at startup, which would be fairly expensive in itself. It turns out that we really don't have choice and we need to do it regardless due to Windows not disallowing writes to the working copy when EdenFS is stopped, and thus for EdenFS to be aware of the actual state of the working copy, it needs to scan it at startup... The first step in doing all of this is to simply rename the various places that uses "fuse refcount" to "fs refcount" which is what this diff does. Reviewed By: chadaustin Differential Revision: D24716801 fbshipit-source-id: e9e6ccff14c454e9f2626fab23daeb3930554b1a 2020-11-05 04:32:07 +03:00			`root.debugGetFsRefcount(),`
add observed inode traversal functions Summary: Add a function that makes it easy to traverse EdenFS's view of the traversed-so-far filesystem. Reviewed By: xavierd Differential Revision: D24299962 fbshipit-source-id: 275c849846bf45a2e80780411d60266b961d825b 2020-10-23 21:13:13 +03:00			`callbacks);`
			`}`

			`} // namespace facebook::eden`