diff --git a/eden/fs/store/hg/HgBackingStore.cpp b/eden/fs/store/hg/HgBackingStore.cpp index de8e2cfc24..bf9cc16500 100644 --- a/eden/fs/store/hg/HgBackingStore.cpp +++ b/eden/fs/store/hg/HgBackingStore.cpp @@ -34,12 +34,8 @@ HgBackingStore::HgBackingStore(StringPiece repository, LocalStore* localStore) HgBackingStore::~HgBackingStore() {} Future> HgBackingStore::getTree(const Hash& id) { - // HgBackingStore imports all relevant Tree objects when the root Tree is - // imported by getTreeForCommit(). We should never have a case where - // we are asked for a Tree that hasn't already been imported. - LOG(ERROR) << "HgBackingStore asked for unknown tree ID " << id.toString(); - return makeFuture>(std::domain_error( - "HgBackingStore asked for unknown tree ID " + id.toString())); + return folly::makeFutureWith( + [&id, this] { return importer_->importTree(id); }); } Future> HgBackingStore::getBlob(const Hash& id) { diff --git a/eden/fs/store/hg/HgImporter.cpp b/eden/fs/store/hg/HgImporter.cpp index 2f2c9b350f..89a889d57d 100644 --- a/eden/fs/store/hg/HgImporter.cpp +++ b/eden/fs/store/hg/HgImporter.cpp @@ -23,11 +23,15 @@ #include #include "HgManifestImporter.h" +#include "eden/fs/model/Tree.h" #include "eden/fs/model/TreeEntry.h" #include "eden/fs/store/LocalStore.h" #include "eden/fs/store/StoreResult.h" #include "eden/fs/utils/PathFuncs.h" +#include "eden/hg/datastorage/cstore/uniondatapackstore.h" +#include "eden/hg/datastorage/ctreemanifest/treemanifest.h" + using folly::ByteRange; using folly::Endian; using folly::io::Appender; @@ -63,7 +67,7 @@ using namespace facebook::eden; constexpr int HELPER_PIPE_FD = 5; /** - * HgBlobInfo manages mercurial (path, revHash) data in the LocalStore. + * HgProxyHash manages mercurial (path, revHash) data in the LocalStore. * * Mercurial doesn't really have a blob hash the same way eden and git do. * Instead, mercurial file revision hashes are always relative to a specific @@ -72,19 +76,19 @@ constexpr int HELPER_PIPE_FD = 5; * * To do so, we hash the (path, revHash) tuple, and use this hash as the blob * hash in eden. We store the eden_blob_hash --> (path, hgRevHash) mapping - * in the LocalStore. The HgBlobInfo class helps store and retrieve these + * in the LocalStore. The HgProxyHash class helps store and retrieve these * mappings. */ -struct HgBlobInfo { +struct HgProxyHash { public: /** - * Load HgBlobInfo data for the given eden blob hash from the LocalStore. + * Load HgProxyHash data for the given eden blob hash from the LocalStore. */ - HgBlobInfo(LocalStore* store, Hash edenBlobHash) { + HgProxyHash(LocalStore* store, Hash edenBlobHash) { // Read the path name and file rev hash auto infoResult = store->get(StringPiece(getBlobKey(edenBlobHash))); if (!infoResult.isValid()) { - LOG(ERROR) << "received unknown mercurial blob hash " + LOG(ERROR) << "received unknown mercurial proxy hash " << edenBlobHash.toString(); // Fall through and let infoResult.extractValue() throw } @@ -93,7 +97,7 @@ struct HgBlobInfo { parseValue(edenBlobHash); } - ~HgBlobInfo() {} + ~HgProxyHash() {} const RelativePathPiece& path() const { return path_; @@ -104,12 +108,29 @@ struct HgBlobInfo { } /** - * Store HgBlobInfo data in the LocalStore. + * Store HgProxyHash data in the LocalStore. * * Returns an eden blob hash that can be used to retrieve the data later - * (using the HgBlobInfo constructor defined above). + * (using the HgProxyHash constructor defined above). */ static Hash store(LocalStore* store, RelativePathPiece path, Hash hgRevHash) { + auto computedPair = prepareToStore(path, hgRevHash); + HgProxyHash::store(store, computedPair); + return computedPair.first; + } + + /** + * Compute the proxy hash information, but do not store it. + * + * This is useful when you need the proxy hash but don't want to commit + * the data until after you have written an associated data item. + * Returns the proxy hash and the data that should be written; + * the caller is responsible for passing the pair to the HgProxyHash::store() + * method below at the appropriate time. + */ + static std::pair prepareToStore( + RelativePathPiece path, + Hash hgRevHash) { // Serialize the (path, hgRevHash) tuple into a buffer. auto buf = serialize(path, hgRevHash); @@ -117,20 +138,32 @@ struct HgBlobInfo { ByteRange serializedInfo = buf.coalesce(); auto edenBlobHash = Hash::sha1(serializedInfo); - // Save the data in the store - store->put(StringPiece(getBlobKey(edenBlobHash)), serializedInfo); - return edenBlobHash; + return std::make_pair(edenBlobHash, std::move(buf)); + } + + /** + * Store precomputed proxy hash information. + * Stores the data computed by prepareToStore(). + */ + static void store( + LocalStore* store, + const std::pair& computedPair) { + store->put( + StringPiece(getBlobKey(computedPair.first)), + // Note that this depends on prepareToStore() having called + // buf.coalesce()! + ByteRange(computedPair.second.data(), computedPair.second.length())); } private: // Not movable or copyable. // path_ points into value_, and would need to be updated after - // copying/moving the data. Since no-one needs to copy or move HgBlobInfo + // copying/moving the data. Since no-one needs to copy or move HgProxyHash // objects, we don't implement this for now. - HgBlobInfo(const HgBlobInfo&) = delete; - HgBlobInfo& operator=(const HgBlobInfo&) = delete; - HgBlobInfo(HgBlobInfo&&) = delete; - HgBlobInfo& operator=(HgBlobInfo&&) = delete; + HgProxyHash(const HgProxyHash&) = delete; + HgProxyHash& operator=(const HgProxyHash&) = delete; + HgProxyHash(HgProxyHash&&) = delete; + HgProxyHash& operator=(HgProxyHash&&) = delete; static std::string getBlobKey(Hash edenBlobHash) { // TODO: Use a RocksDB column family for this rather than having to @@ -342,6 +375,20 @@ HgImporter::HgImporter(StringPiece repoPath, LocalStore* store) throw std::runtime_error( "unexpected start message from hg_import_helper script"); } + + dataPackStores_.emplace_back(std::make_unique( + folly::to(repoPath, "/.hg/store/packs/manifests"))); + + auto hgCachePath = getCachePath(); + if (!hgCachePath.empty()) { + dataPackStores_.emplace_back(std::make_unique(hgCachePath)); + } + + std::vector storePtrs; + for (auto& store : dataPackStores_) { + storePtrs.emplace_back(store.get()); + } + unionStore_ = std::make_unique(storePtrs); } HgImporter::~HgImporter() { @@ -349,49 +396,155 @@ HgImporter::~HgImporter() { helper_.wait(); } -Hash HgImporter::importManifest(StringPiece revName) { - // Send the manifest request to the helper process - sendManifestRequest(revName); +std::unique_ptr HgImporter::importTree(const Hash& edenBlobHash) { + HgProxyHash pathInfo(store_, edenBlobHash); + return importTreeImpl( + pathInfo.revHash(), // this is really the manifest node + edenBlobHash, + pathInfo.path()); +} - HgManifestImporter importer(store_); - size_t numPaths = 0; +std::unique_ptr HgImporter::importTreeImpl( + const Hash& manifestNode, + const Hash& edenBlobHash, + RelativePathPiece path) { + auto content = unionStore_->get( + Key(path.stringPiece().data(), + path.stringPiece().size(), + (const char*)manifestNode.getBytes().data(), + manifestNode.getBytes().size())); - IOBuf chunkData; - while (true) { - // Read the chunk header - auto header = readChunkHeader(); - - // Allocate a larger chunk buffer if we need to, - // but prefer to re-use the old buffer if we can. - if (header.dataLength > chunkData.capacity()) { - chunkData = IOBuf(IOBuf::CREATE, header.dataLength); - } else { - chunkData.clear(); - } - folly::readFull(helperOut_, chunkData.writableTail(), header.dataLength); - chunkData.append(header.dataLength); - - // Now process the entries in the chunk - Cursor cursor(&chunkData); - while (!cursor.isAtEnd()) { - readManifestEntry(importer, cursor); - ++numPaths; - } - - if ((header.flags & FLAG_MORE_CHUNKS) == 0) { - break; - } + if (!content.content()) { + throw std::domain_error(folly::to( + "HgImporter::importTree asked for unknown tree ", + path, + ", ID ", + manifestNode.toString())); } - auto rootHash = importer.finish(); - VLOG(1) << "processed " << numPaths << " manifest paths"; - return rootHash; + Manifest manifest(content); + std::vector entries; + + auto iter = manifest.getIterator(); + while (!iter.isfinished()) { + auto* entry = iter.currentvalue(); + + // The node is the hex string representation of the hash, but + // it is not NUL terminated! + StringPiece node(entry->node, 40); + Hash entryHash(node); + + StringPiece entryName(entry->filename, entry->filenamelen); + + FileType fileType; + uint8_t ownerPermissions; + + VLOG(10) << "tree: " << manifestNode << " " << entryName + << " node: " << node << " flag: " << entry->flag; + + if (entry->isdirectory()) { + fileType = FileType::DIRECTORY; + ownerPermissions = 0b110; + } else if (entry->flag) { + switch (*entry->flag) { + case 'x': + fileType = FileType::REGULAR_FILE; + ownerPermissions = 0b111; + break; + case 'l': + fileType = FileType::SYMLINK; + ownerPermissions = 0b111; + break; + default: + throw std::runtime_error(folly::to( + "unsupported file flags for ", + path, + "/", + entryName, + ": ", + entry->flag)); + } + } else { + fileType = FileType::REGULAR_FILE; + ownerPermissions = 0b110; + } + + auto proxyHash = HgProxyHash::store( + store_, path + RelativePathPiece(entryName), entryHash); + + entries.emplace_back(proxyHash, entryName, fileType, ownerPermissions); + + iter.next(); + } + + auto tree = std::make_unique(std::move(entries), manifestNode); + auto serialized = store_->serializeTree(tree.get()); + store_->put(manifestNode, serialized.second.coalesce()); + return tree; +} + +Hash HgImporter::importManifest(StringPiece revName) { + try { + auto manifestNode = resolveManifestNode(revName); + LOG(ERROR) << "revision " << revName << " has manifest node " + << manifestNode; + + // Record that we are at the root for this node + RelativePathPiece path{}; + auto proxyInfo = HgProxyHash::prepareToStore(path, manifestNode); + auto tree = importTreeImpl(manifestNode, proxyInfo.first, path); + // Only write the proxy hash value for this once we've imported + // the root. + HgProxyHash::store(store_, proxyInfo); + + return tree->getHash(); + } catch (const MissingKeyError&) { + // We don't have a tree manifest available for the target rev, + // so let's fall back to the full flat manifest importer. + + // Send the manifest request to the helper process + sendManifestRequest(revName); + + HgManifestImporter importer(store_); + size_t numPaths = 0; + + IOBuf chunkData; + while (true) { + // Read the chunk header + auto header = readChunkHeader(); + + // Allocate a larger chunk buffer if we need to, + // but prefer to re-use the old buffer if we can. + if (header.dataLength > chunkData.capacity()) { + chunkData = IOBuf(IOBuf::CREATE, header.dataLength); + } else { + chunkData.clear(); + } + folly::readFull(helperOut_, chunkData.writableTail(), header.dataLength); + chunkData.append(header.dataLength); + + // Now process the entries in the chunk + Cursor cursor(&chunkData); + while (!cursor.isAtEnd()) { + readManifestEntry(importer, cursor); + ++numPaths; + } + + if ((header.flags & FLAG_MORE_CHUNKS) == 0) { + break; + } + } + auto rootHash = importer.finish(); + VLOG(1) << "processed " << numPaths << " manifest paths"; + + return rootHash; + } } IOBuf HgImporter::importFileContents(Hash blobHash) { // Look up the mercurial path and file revision hash, // which we need to import the data from mercurial - HgBlobInfo hgInfo(store_, blobHash); + HgProxyHash hgInfo(store_, blobHash); VLOG(5) << "requesting file contents of '" << hgInfo.path() << "', " << hgInfo.revHash().toString(); @@ -412,6 +565,23 @@ IOBuf HgImporter::importFileContents(Hash blobHash) { return buf; } +Hash HgImporter::resolveManifestNode(folly::StringPiece revName) { + sendManifestNodeRequest(revName); + + auto header = readChunkHeader(); + if (header.dataLength != 20) { + throw std::runtime_error(folly::to( + "expected a 20-byte hash for the manifest node, " + "but got data of length ", + header.dataLength)); + } + + Hash::Storage buffer; + folly::readFull(helperOut_, &buffer[0], buffer.size()); + + return Hash(buffer); +} + void HgImporter::readManifestEntry( HgManifestImporter& importer, folly::io::Cursor& cursor) { @@ -456,13 +626,22 @@ void HgImporter::readManifestEntry( RelativePathPiece path(pathStr); // Generate a blob hash from the mercurial (path, fileRev) information - auto blobHash = HgBlobInfo::store(store_, path, fileRevHash); + auto blobHash = HgProxyHash::store(store_, path, fileRevHash); auto entry = TreeEntry(blobHash, path.basename().value(), fileType, ownerPermissions); importer.processEntry(path.dirname(), std::move(entry)); } +std::string HgImporter::getCachePath() { + sendGetCachePathRequest(); + auto header = readChunkHeader(); + std::string result; + result.resize(header.dataLength); + folly::readFull(helperOut_, &result[0], header.dataLength); + return result; +} + HgImporter::ChunkHeader HgImporter::readChunkHeader() { ChunkHeader header; folly::readFull(helperOut_, &header, sizeof(header)); @@ -499,6 +678,21 @@ void HgImporter::sendManifestRequest(folly::StringPiece revName) { folly::writevFull(helperIn_, iov.data(), iov.size()); } +void HgImporter::sendManifestNodeRequest(folly::StringPiece revName) { + ChunkHeader header; + header.command = Endian::big(CMD_MANIFEST_NODE_FOR_COMMIT); + header.requestID = Endian::big(nextRequestID_++); + header.flags = 0; + header.dataLength = Endian::big(revName.size()); + + std::array iov; + iov[0].iov_base = &header; + iov[0].iov_len = sizeof(header); + iov[1].iov_base = const_cast(revName.data()); + iov[1].iov_len = revName.size(); + folly::writevFull(helperIn_, iov.data(), iov.size()); +} + void HgImporter::sendFileRequest(RelativePathPiece path, Hash revHash) { ChunkHeader header; header.command = Endian::big(CMD_CAT_FILE); @@ -516,5 +710,18 @@ void HgImporter::sendFileRequest(RelativePathPiece path, Hash revHash) { iov[2].iov_len = pathStr.size(); folly::writevFull(helperIn_, iov.data(), iov.size()); } + +void HgImporter::sendGetCachePathRequest() { + ChunkHeader header; + header.command = Endian::big(CMD_GET_CACHE_PATH); + header.requestID = Endian::big(nextRequestID_++); + header.flags = 0; + header.dataLength = 0; + + std::array iov; + iov[0].iov_base = &header; + iov[0].iov_len = sizeof(header); + folly::writevFull(helperIn_, iov.data(), iov.size()); +} } } // facebook::eden diff --git a/eden/fs/store/hg/HgImporter.h b/eden/fs/store/hg/HgImporter.h index 7c8ce65195..8d7c8c88ad 100644 --- a/eden/fs/store/hg/HgImporter.h +++ b/eden/fs/store/hg/HgImporter.h @@ -21,12 +21,18 @@ class Cursor; } } +/* forward declare support classes from mercurial */ +class DatapackStore; +class UnionDatapackStore; + namespace facebook { namespace eden { class Hash; class HgManifestImporter; class LocalStore; +class StoreResult; +class Tree; /** * HgImporter provides an API for extracting data out of a mercurial @@ -60,6 +66,14 @@ class HgImporter { */ Hash importManifest(folly::StringPiece revName); + /** + * Import the tree with the specified tree manifest hash. + * + * Returns the Tree, or throws on error. + * Requires that tree manifest data be available. + */ + std::unique_ptr importTree(const Hash& edenBlobHash); + /** * Import file information * @@ -70,6 +84,16 @@ class HgImporter { */ folly::IOBuf importFileContents(Hash blobHash); + /** + * Resolve the manifest node for the specified revision. + * + * This is used to locate the mercurial tree manifest data for + * the root tree of a given commit. + * + * Returns a Hash identifying the manifest node for the revision. + */ + Hash resolveManifestNode(folly::StringPiece revName); + private: /** * Chunk header flags. @@ -91,6 +115,8 @@ class HgImporter { CMD_RESPONSE = 1, CMD_MANIFEST = 2, CMD_CAT_FILE = 3, + CMD_MANIFEST_NODE_FOR_COMMIT = 4, + CMD_GET_CACHE_PATH = 5, }; struct ChunkHeader { uint32_t requestID; @@ -131,6 +157,26 @@ class HgImporter { * of the given file at the specified file revision. */ void sendFileRequest(RelativePathPiece path, Hash fileRevHash); + /** + * Send a request to the helper process, asking it to send us the + * manifest node (NOT the full manifest!) for the specified revision. + */ + void sendManifestNodeRequest(folly::StringPiece revName); + /** + * Determine the shared tree manifest pack location associated with + * this repo. + */ + std::string getCachePath(); + /** + * Send a request to the helper process, asking it to send us the + * tree manifest pack location. + */ + void sendGetCachePathRequest(); + + std::unique_ptr importTreeImpl( + const Hash& manifestNode, + const Hash& edenBlobHash, + RelativePathPiece path); folly::Subprocess helper_; LocalStore* store_{nullptr}; @@ -145,6 +191,9 @@ class HgImporter { */ int helperIn_{-1}; int helperOut_{-1}; + + std::vector> dataPackStores_; + std::unique_ptr unionStore_; }; } } // facebook::eden diff --git a/eden/fs/store/hg/TARGETS b/eden/fs/store/hg/TARGETS index 4b1c1e4616..0ee1065c02 100644 --- a/eden/fs/store/hg/TARGETS +++ b/eden/fs/store/hg/TARGETS @@ -13,6 +13,7 @@ cpp_library( '@/eden/fs/model:model', '@/eden/fs/model/git:git', '@/eden/fs/store:store', + '@/eden/hg/datastorage:datapack', '@/folly:folly', '@/folly:subprocess', ], diff --git a/eden/fs/store/hg/hg_import_helper.py b/eden/fs/store/hg/hg_import_helper.py index 9d74fd4cd8..82deae6f25 100755 --- a/eden/fs/store/hg/hg_import_helper.py +++ b/eden/fs/store/hg/hg_import_helper.py @@ -24,6 +24,7 @@ import mercurial.hg import mercurial.node import mercurial.scmutil import mercurial.ui +from remotefilelog import shallowutil, constants # # Message chunk header format. @@ -64,6 +65,8 @@ CMD_STARTED = 0 CMD_RESPONSE = 1 CMD_MANIFEST = 2 CMD_CAT_FILE = 3 +CMD_MANIFEST_NODE_FOR_COMMIT = 4 +CMD_GET_CACHE_PATH = 5 # # Flag values. @@ -269,6 +272,50 @@ class HgServer(object): contents = self.get_file(path, rev_hash) self.send_chunk(request, contents) + @cmd(CMD_MANIFEST_NODE_FOR_COMMIT) + def cmd_manifest_node_for_commit(self, request): + ''' + Handler for CMD_MANIFEST_NODE_FOR_COMMIT requests. + + Given a commit hash, resolve the manifest node. + + Request body format: + - Revision name (string) + This is the mercurial revision ID. This can be any string that will + be understood by mercurial to identify a single revision. (For + instance, this might be ".", ".^", a 40-character hexadecmial hash, + or a unique hash prefix, etc.) + + Response body format: + The response body is the manifest node, a 20-byte binary value. + ''' + rev_name = request.body + self.debug('resolving manifest node for revision %r', rev_name) + self.send_chunk(request, self.get_manifest_node(rev_name)) + + @cmd(CMD_GET_CACHE_PATH) + def cmd_get_cache_path(self, request): + ''' + Handler for CMD_GET_CACHE_PATH requests. + + Computes the tree pack cache path for the repo. + + Request body format: no arguments. + + Response body format: + - The path holding tree packs (string) + ''' + if not hasattr(self.repo, 'name'): + # The repo doesn't have the appropriate extensions configured + # to support tree manifests, so return an empty path. + # This happens in our integration test suite. + cache_path = '' + else: + cache_path = shallowutil.getcachepackpath(self.repo, + constants.TREEPACK_CATEGORY) + self.send_chunk(request, cache_path) + + def send_chunk(self, request, data, is_last=True): flags = 0 if not is_last: @@ -335,6 +382,19 @@ class HgServer(object): self.debug('sent manifest with %d paths in %s seconds', num_paths, time.time() - start) + def get_manifest_node(self, rev): + try: + ctx = mercurial.scmutil.revsingle(self.repo, rev) + return ctx.manifestnode() + except Exception: + # The mercurial call may fail with a "no node" error if this + # revision in question has added to the repository after we + # originally opened it. Invalidate the repository and try again, + # in case our cached repo data is just stale. + self.repo.invalidate() + ctx = mercurial.scmutil.revsingle(self.repo, rev) + return ctx.manifestnode() + def get_file(self, path, rev_hash): try: fctx = self.repo.filectx(path, fileid=rev_hash)