From 4ff8d15d1813fd7fa1c8a1c78598442099616889 Mon Sep 17 00:00:00 2001 From: Adam Simpkins Date: Thu, 14 Jun 2018 21:32:44 -0700 Subject: [PATCH] move HgProxyHash to its own top-level file Summary: Move the HgProxyHash code out of HgImporter.cpp and into its own top-level file. This does not have any functional changes other than moving the code around. This will make it easier to perform HgProxyHash operation from inside HgBackingStore.cpp. Currently all of the HgProxyHash operations are done from inside HgImporter threads. This operation does not need to be done in the importer threads however, and for operations like putBatch() that complete asynchronously it generally should not be done from the HgImporter threads. Reviewed By: wez Differential Revision: D8438776 fbshipit-source-id: 344652f47e5ccdc6ef20b143dc52d0eeac2886e6 --- eden/fs/store/hg/HgImporter.cpp | 226 +------------------------------ eden/fs/store/hg/HgProxyHash.cpp | 169 +++++++++++++++++++++++ eden/fs/store/hg/HgProxyHash.h | 142 +++++++++++++++++++ 3 files changed, 312 insertions(+), 225 deletions(-) create mode 100644 eden/fs/store/hg/HgProxyHash.cpp create mode 100644 eden/fs/store/hg/HgProxyHash.h diff --git a/eden/fs/store/hg/HgImporter.cpp b/eden/fs/store/hg/HgImporter.cpp index 2cf9c2a8dd..e6e7a76ee3 100644 --- a/eden/fs/store/hg/HgImporter.cpp +++ b/eden/fs/store/hg/HgImporter.cpp @@ -30,9 +30,9 @@ #include "eden/fs/model/Tree.h" #include "eden/fs/model/TreeEntry.h" #include "eden/fs/store/LocalStore.h" -#include "eden/fs/store/StoreResult.h" #include "eden/fs/store/hg/HgImportPyError.h" #include "eden/fs/store/hg/HgManifestImporter.h" +#include "eden/fs/store/hg/HgProxyHash.h" #include "eden/fs/utils/PathFuncs.h" #include "eden/fs/utils/TimeUtil.h" @@ -109,230 +109,6 @@ using namespace facebook::eden; */ constexpr int HELPER_PIPE_FD = 5; -/** - * HgProxyHash manages mercurial (path, revHash) data in the LocalStore. - * - * Mercurial doesn't really have a blob hash the same way eden and git do. - * Instead, mercurial file revision hashes are always relative to a specific - * path. To use the data in eden, we need to create a blob hash that we can - * use instead. - * - * To do so, we hash the (path, revHash) tuple, and use this hash as the blob - * hash in eden. We store the eden_blob_hash --> (path, hgRevHash) mapping - * in the LocalStore. The HgProxyHash class helps store and retrieve these - * mappings. - */ -struct HgProxyHash { - public: - /** - * Load HgProxyHash data for the given eden blob hash from the LocalStore. - */ - HgProxyHash( - LocalStore* store, - Hash edenBlobHash, - folly::StringPiece context) { - // Read the path name and file rev hash - auto infoResult = store->get(KeySpace::HgProxyHashFamily, edenBlobHash); - if (!infoResult.isValid()) { - XLOG(ERR) << "received unknown mercurial proxy hash " - << edenBlobHash.toString() << " in " << context; - // Fall through and let infoResult.extractValue() throw - } - - value_ = infoResult.extractValue(); - parseValue(edenBlobHash); - } - - ~HgProxyHash() {} - - const RelativePathPiece& path() const { - return path_; - } - - const Hash& revHash() const { - return revHash_; - } - - static folly::Future>> getBatch( - LocalStore* store, - const std::vector& blobHashes) { - auto hashCopies = std::make_shared>(blobHashes); - std::vector byteRanges; - for (auto& hash : *hashCopies) { - byteRanges.push_back(hash.getBytes()); - } - return store->getBatch(KeySpace::HgProxyHashFamily, byteRanges) - .then([blobHashes = hashCopies](std::vector&& data) { - std::vector> results; - - for (size_t i = 0; i < blobHashes->size(); ++i) { - HgProxyHash hgInfo( - blobHashes->at(i), data[i], "prefetchFiles getBatch"); - - results.emplace_back(hgInfo.path().copy(), hgInfo.revHash()); - } - - return results; - }); - } - - /** - * Store HgProxyHash data in the LocalStore. - * - * Returns an eden blob hash that can be used to retrieve the data later - * (using the HgProxyHash constructor defined above). - */ - static Hash store( - RelativePathPiece path, - Hash hgRevHash, - LocalStore::WriteBatch* writeBatch) { - auto computedPair = prepareToStore(path, hgRevHash); - HgProxyHash::store(computedPair, writeBatch); - return computedPair.first; - } - - /** - * Compute the proxy hash information, but do not store it. - * - * This is useful when you need the proxy hash but don't want to commit - * the data until after you have written an associated data item. - * Returns the proxy hash and the data that should be written; - * the caller is responsible for passing the pair to the HgProxyHash::store() - * method below at the appropriate time. - */ - static std::pair prepareToStore( - RelativePathPiece path, - Hash hgRevHash) { - // Serialize the (path, hgRevHash) tuple into a buffer. - auto buf = serialize(path, hgRevHash); - - // Compute the hash of the serialized buffer - ByteRange serializedInfo = buf.coalesce(); - auto edenBlobHash = Hash::sha1(serializedInfo); - - return std::make_pair(edenBlobHash, std::move(buf)); - } - - /** - * Store precomputed proxy hash information. - * Stores the data computed by prepareToStore(). - */ - static void store( - const std::pair& computedPair, - LocalStore::WriteBatch* writeBatch) { - writeBatch->put( - KeySpace::HgProxyHashFamily, - computedPair.first, - // Note that this depends on prepareToStore() having called - // buf.coalesce()! - ByteRange(computedPair.second.data(), computedPair.second.length())); - } - - private: - // Not movable or copyable. - // path_ points into value_, and would need to be updated after - // copying/moving the data. Since no-one needs to copy or move HgProxyHash - // objects, we don't implement this for now. - HgProxyHash(const HgProxyHash&) = delete; - HgProxyHash& operator=(const HgProxyHash&) = delete; - HgProxyHash(HgProxyHash&&) = delete; - HgProxyHash& operator=(HgProxyHash&&) = delete; - - HgProxyHash( - Hash edenBlobHash, - StoreResult& infoResult, - folly::StringPiece context) { - if (!infoResult.isValid()) { - XLOG(ERR) << "received unknown mercurial proxy hash " - << edenBlobHash.toString() << " in " << context; - // Fall through and let infoResult.extractValue() throw - } - - value_ = infoResult.extractValue(); - parseValue(edenBlobHash); - } - - /** - * Serialize the (path, hgRevHash) data into a buffer that will be stored in - * the LocalStore. - */ - static IOBuf serialize(RelativePathPiece path, Hash hgRevHash) { - // We serialize the data as - // - // The path_length is stored as a big-endian uint32_t. - auto pathStr = path.stringPiece(); - IOBuf buf( - IOBuf::CREATE, Hash::RAW_SIZE + sizeof(uint32_t) + pathStr.size()); - Appender appender(&buf, 0); - appender.push(hgRevHash.getBytes()); - appender.writeBE(pathStr.size()); - appender.push(pathStr); - - return buf; - } - - /** - * Parse the serialized data found in value_, and set revHash_ and path_. - * - * The value_ member variable should already contain the serialized data, - * (as returned by serialize()). - * - * Note that path_ will be set to a RelativePathPiece pointing into the - * string data owned by value_. (This lets us avoid copying the string data - * out.) - */ - void parseValue(Hash edenBlobHash) { - ByteRange infoBytes = StringPiece(value_); - // Make sure the data is long enough to contain the rev hash and path length - if (infoBytes.size() < Hash::RAW_SIZE + sizeof(uint32_t)) { - auto msg = folly::to( - "mercurial blob info data for ", - edenBlobHash.toString(), - " is too short (", - infoBytes.size(), - " bytes)"); - XLOG(ERR) << msg; - throw std::length_error(msg); - } - - // Extract the revHash_ - revHash_ = Hash(infoBytes.subpiece(0, Hash::RAW_SIZE)); - infoBytes.advance(Hash::RAW_SIZE); - - // Extract the path length - uint32_t pathLength; - memcpy(&pathLength, infoBytes.data(), sizeof(uint32_t)); - pathLength = Endian::big(pathLength); - infoBytes.advance(sizeof(uint32_t)); - // Make sure the path length agrees with the length of data remaining - if (infoBytes.size() != pathLength) { - auto msg = folly::to( - "mercurial blob info data for ", - edenBlobHash.toString(), - " has inconsistent path length"); - XLOG(ERR) << msg; - throw std::length_error(msg); - } - - // Extract the path_ - path_ = RelativePathPiece(StringPiece(infoBytes)); - } - - /** - * The serialized data. - */ - std::string value_; - /** - * The revision hash. - */ - Hash revHash_; - /** - * The path name. Note that this points into the serialized value_ data. - * path_ itself does not own the data it points to. - */ - RelativePathPiece path_; -}; - /** * Internal helper function for use by getImportHelperPath(). * diff --git a/eden/fs/store/hg/HgProxyHash.cpp b/eden/fs/store/hg/HgProxyHash.cpp new file mode 100644 index 0000000000..5eb9b0a12b --- /dev/null +++ b/eden/fs/store/hg/HgProxyHash.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + */ +#include "eden/fs/store/hg/HgProxyHash.h" + +#include +#include +#include +#include + +#include "eden/fs/store/LocalStore.h" +#include "eden/fs/store/StoreResult.h" + +using folly::ByteRange; +using folly::Endian; +using folly::IOBuf; +using folly::StringPiece; +using folly::io::Appender; +using std::string; +using KeySpace = facebook::eden::LocalStore::KeySpace; + +namespace facebook { +namespace eden { + +HgProxyHash::HgProxyHash( + LocalStore* store, + Hash edenBlobHash, + StringPiece context) { + // Read the path name and file rev hash + auto infoResult = store->get(KeySpace::HgProxyHashFamily, edenBlobHash); + if (!infoResult.isValid()) { + XLOG(ERR) << "received unknown mercurial proxy hash " + << edenBlobHash.toString() << " in " << context; + // Fall through and let infoResult.extractValue() throw + } + + value_ = infoResult.extractValue(); + parseValue(edenBlobHash); +} + +folly::Future>> HgProxyHash::getBatch( + LocalStore* store, + const std::vector& blobHashes) { + auto hashCopies = std::make_shared>(blobHashes); + std::vector byteRanges; + for (auto& hash : *hashCopies) { + byteRanges.push_back(hash.getBytes()); + } + return store->getBatch(KeySpace::HgProxyHashFamily, byteRanges) + .then([blobHashes = hashCopies](std::vector&& data) { + std::vector> results; + + for (size_t i = 0; i < blobHashes->size(); ++i) { + HgProxyHash hgInfo( + blobHashes->at(i), data[i], "prefetchFiles getBatch"); + + results.emplace_back(hgInfo.path().copy(), hgInfo.revHash()); + } + + return results; + }); +} + +Hash HgProxyHash::store( + RelativePathPiece path, + Hash hgRevHash, + LocalStore::WriteBatch* writeBatch) { + auto computedPair = prepareToStore(path, hgRevHash); + HgProxyHash::store(computedPair, writeBatch); + return computedPair.first; +} + +std::pair HgProxyHash::prepareToStore( + RelativePathPiece path, + Hash hgRevHash) { + // Serialize the (path, hgRevHash) tuple into a buffer. + auto buf = serialize(path, hgRevHash); + + // Compute the hash of the serialized buffer + ByteRange serializedInfo = buf.coalesce(); + auto edenBlobHash = Hash::sha1(serializedInfo); + + return std::make_pair(edenBlobHash, std::move(buf)); +} + +void HgProxyHash::store( + const std::pair& computedPair, + LocalStore::WriteBatch* writeBatch) { + writeBatch->put( + KeySpace::HgProxyHashFamily, + computedPair.first, + // Note that this depends on prepareToStore() having called + // buf.coalesce()! + ByteRange(computedPair.second.data(), computedPair.second.length())); +} + +HgProxyHash::HgProxyHash( + Hash edenBlobHash, + StoreResult& infoResult, + StringPiece context) { + if (!infoResult.isValid()) { + XLOG(ERR) << "received unknown mercurial proxy hash " + << edenBlobHash.toString() << " in " << context; + // Fall through and let infoResult.extractValue() throw + } + + value_ = infoResult.extractValue(); + parseValue(edenBlobHash); +} + +IOBuf HgProxyHash::serialize(RelativePathPiece path, Hash hgRevHash) { + // We serialize the data as + // + // The path_length is stored as a big-endian uint32_t. + auto pathStr = path.stringPiece(); + IOBuf buf(IOBuf::CREATE, Hash::RAW_SIZE + sizeof(uint32_t) + pathStr.size()); + Appender appender(&buf, 0); + appender.push(hgRevHash.getBytes()); + appender.writeBE(pathStr.size()); + appender.push(pathStr); + + return buf; +} + +void HgProxyHash::parseValue(Hash edenBlobHash) { + ByteRange infoBytes = StringPiece(value_); + // Make sure the data is long enough to contain the rev hash and path length + if (infoBytes.size() < Hash::RAW_SIZE + sizeof(uint32_t)) { + auto msg = folly::to( + "mercurial blob info data for ", + edenBlobHash.toString(), + " is too short (", + infoBytes.size(), + " bytes)"); + XLOG(ERR) << msg; + throw std::length_error(msg); + } + + // Extract the revHash_ + revHash_ = Hash(infoBytes.subpiece(0, Hash::RAW_SIZE)); + infoBytes.advance(Hash::RAW_SIZE); + + // Extract the path length + uint32_t pathLength; + memcpy(&pathLength, infoBytes.data(), sizeof(uint32_t)); + pathLength = Endian::big(pathLength); + infoBytes.advance(sizeof(uint32_t)); + // Make sure the path length agrees with the length of data remaining + if (infoBytes.size() != pathLength) { + auto msg = folly::to( + "mercurial blob info data for ", + edenBlobHash.toString(), + " has inconsistent path length"); + XLOG(ERR) << msg; + throw std::length_error(msg); + } + + // Extract the path_ + path_ = RelativePathPiece(StringPiece(infoBytes)); +} + +} // namespace eden +} // namespace facebook diff --git a/eden/fs/store/hg/HgProxyHash.h b/eden/fs/store/hg/HgProxyHash.h new file mode 100644 index 0000000000..0037d68bec --- /dev/null +++ b/eden/fs/store/hg/HgProxyHash.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + */ +#pragma once + +#include +#include +#include "eden/fs/model/Hash.h" +#include "eden/fs/store/LocalStore.h" +#include "eden/fs/utils/PathFuncs.h" + +namespace folly { +template +class Future; +class IOBuf; +} // namespace folly + +namespace facebook { +namespace eden { + +/** + * HgProxyHash manages mercurial (path, revHash) data in the LocalStore. + * + * Mercurial doesn't really have a blob hash the same way eden and git do. + * Instead, mercurial file revision hashes are always relative to a specific + * path. To use the data in eden, we need to create a blob hash that we can + * use instead. + * + * To do so, we hash the (path, revHash) tuple, and use this hash as the blob + * hash in eden. We store the eden_blob_hash --> (path, hgRevHash) mapping + * in the LocalStore. The HgProxyHash class helps store and retrieve these + * mappings. + */ +class HgProxyHash { + public: + /** + * Load HgProxyHash data for the given eden blob hash from the LocalStore. + */ + HgProxyHash(LocalStore* store, Hash edenBlobHash, folly::StringPiece context); + + ~HgProxyHash() {} + + const RelativePathPiece& path() const { + return path_; + } + + const Hash& revHash() const { + return revHash_; + } + + static folly::Future>> getBatch( + LocalStore* store, + const std::vector& blobHashes); + + /** + * Store HgProxyHash data in the LocalStore. + * + * Returns an eden blob hash that can be used to retrieve the data later + * (using the HgProxyHash constructor defined above). + */ + static Hash store( + RelativePathPiece path, + Hash hgRevHash, + LocalStore::WriteBatch* writeBatch); + + /** + * Compute the proxy hash information, but do not store it. + * + * This is useful when you need the proxy hash but don't want to commit + * the data until after you have written an associated data item. + * Returns the proxy hash and the data that should be written; + * the caller is responsible for passing the pair to the HgProxyHash::store() + * method below at the appropriate time. + */ + static std::pair prepareToStore( + RelativePathPiece path, + Hash hgRevHash); + + /** + * Store precomputed proxy hash information. + * Stores the data computed by prepareToStore(). + */ + static void store( + const std::pair& computedPair, + LocalStore::WriteBatch* writeBatch); + + private: + // Not movable or copyable. + // path_ points into value_, and would need to be updated after + // copying/moving the data. Since no-one needs to copy or move HgProxyHash + // objects, we don't implement this for now. + HgProxyHash(const HgProxyHash&) = delete; + HgProxyHash& operator=(const HgProxyHash&) = delete; + HgProxyHash(HgProxyHash&&) = delete; + HgProxyHash& operator=(HgProxyHash&&) = delete; + + HgProxyHash( + Hash edenBlobHash, + StoreResult& infoResult, + folly::StringPiece context); + + /** + * Serialize the (path, hgRevHash) data into a buffer that will be stored in + * the LocalStore. + */ + static folly::IOBuf serialize(RelativePathPiece path, Hash hgRevHash); + + /** + * Parse the serialized data found in value_, and set revHash_ and path_. + * + * The value_ member variable should already contain the serialized data, + * (as returned by serialize()). + * + * Note that path_ will be set to a RelativePathPiece pointing into the + * string data owned by value_. (This lets us avoid copying the string data + * out.) + */ + void parseValue(Hash edenBlobHash); + + /** + * The serialized data as written in the LocalStore. + */ + std::string value_; + /** + * The revision hash. + */ + Hash revHash_; + /** + * The path name. Note that this points into the serialized value_ data. + * path_ itself does not own the data it points to. + */ + RelativePathPiece path_; +}; + +} // namespace eden +} // namespace facebook