move HgProxyHash to its own top-level file

Summary:
Move the HgProxyHash code out of HgImporter.cpp and into its own top-level
file.  This does not have any functional changes other than moving the code
around.

This will make it easier to perform HgProxyHash operation from inside
HgBackingStore.cpp.  Currently all of the HgProxyHash operations are done from
inside HgImporter threads.  This operation does not need to be done in the
importer threads however, and for operations like putBatch() that complete
asynchronously it generally should not be done from the HgImporter threads.

Reviewed By: wez

Differential Revision: D8438776

fbshipit-source-id: 344652f47e5ccdc6ef20b143dc52d0eeac2886e6
This commit is contained in:
Adam Simpkins 2018-06-14 21:32:44 -07:00 committed by Facebook Github Bot
parent 193e26d73f
commit 4ff8d15d18
3 changed files with 312 additions and 225 deletions

View File

@ -30,9 +30,9 @@
#include "eden/fs/model/Tree.h"
#include "eden/fs/model/TreeEntry.h"
#include "eden/fs/store/LocalStore.h"
#include "eden/fs/store/StoreResult.h"
#include "eden/fs/store/hg/HgImportPyError.h"
#include "eden/fs/store/hg/HgManifestImporter.h"
#include "eden/fs/store/hg/HgProxyHash.h"
#include "eden/fs/utils/PathFuncs.h"
#include "eden/fs/utils/TimeUtil.h"
@ -109,230 +109,6 @@ using namespace facebook::eden;
*/
constexpr int HELPER_PIPE_FD = 5;
/**
* HgProxyHash manages mercurial (path, revHash) data in the LocalStore.
*
* Mercurial doesn't really have a blob hash the same way eden and git do.
* Instead, mercurial file revision hashes are always relative to a specific
* path. To use the data in eden, we need to create a blob hash that we can
* use instead.
*
* To do so, we hash the (path, revHash) tuple, and use this hash as the blob
* hash in eden. We store the eden_blob_hash --> (path, hgRevHash) mapping
* in the LocalStore. The HgProxyHash class helps store and retrieve these
* mappings.
*/
struct HgProxyHash {
public:
/**
* Load HgProxyHash data for the given eden blob hash from the LocalStore.
*/
HgProxyHash(
LocalStore* store,
Hash edenBlobHash,
folly::StringPiece context) {
// Read the path name and file rev hash
auto infoResult = store->get(KeySpace::HgProxyHashFamily, edenBlobHash);
if (!infoResult.isValid()) {
XLOG(ERR) << "received unknown mercurial proxy hash "
<< edenBlobHash.toString() << " in " << context;
// Fall through and let infoResult.extractValue() throw
}
value_ = infoResult.extractValue();
parseValue(edenBlobHash);
}
~HgProxyHash() {}
const RelativePathPiece& path() const {
return path_;
}
const Hash& revHash() const {
return revHash_;
}
static folly::Future<std::vector<std::pair<RelativePath, Hash>>> getBatch(
LocalStore* store,
const std::vector<Hash>& blobHashes) {
auto hashCopies = std::make_shared<std::vector<Hash>>(blobHashes);
std::vector<folly::ByteRange> byteRanges;
for (auto& hash : *hashCopies) {
byteRanges.push_back(hash.getBytes());
}
return store->getBatch(KeySpace::HgProxyHashFamily, byteRanges)
.then([blobHashes = hashCopies](std::vector<StoreResult>&& data) {
std::vector<std::pair<RelativePath, Hash>> results;
for (size_t i = 0; i < blobHashes->size(); ++i) {
HgProxyHash hgInfo(
blobHashes->at(i), data[i], "prefetchFiles getBatch");
results.emplace_back(hgInfo.path().copy(), hgInfo.revHash());
}
return results;
});
}
/**
* Store HgProxyHash data in the LocalStore.
*
* Returns an eden blob hash that can be used to retrieve the data later
* (using the HgProxyHash constructor defined above).
*/
static Hash store(
RelativePathPiece path,
Hash hgRevHash,
LocalStore::WriteBatch* writeBatch) {
auto computedPair = prepareToStore(path, hgRevHash);
HgProxyHash::store(computedPair, writeBatch);
return computedPair.first;
}
/**
* Compute the proxy hash information, but do not store it.
*
* This is useful when you need the proxy hash but don't want to commit
* the data until after you have written an associated data item.
* Returns the proxy hash and the data that should be written;
* the caller is responsible for passing the pair to the HgProxyHash::store()
* method below at the appropriate time.
*/
static std::pair<Hash, IOBuf> prepareToStore(
RelativePathPiece path,
Hash hgRevHash) {
// Serialize the (path, hgRevHash) tuple into a buffer.
auto buf = serialize(path, hgRevHash);
// Compute the hash of the serialized buffer
ByteRange serializedInfo = buf.coalesce();
auto edenBlobHash = Hash::sha1(serializedInfo);
return std::make_pair(edenBlobHash, std::move(buf));
}
/**
* Store precomputed proxy hash information.
* Stores the data computed by prepareToStore().
*/
static void store(
const std::pair<Hash, IOBuf>& computedPair,
LocalStore::WriteBatch* writeBatch) {
writeBatch->put(
KeySpace::HgProxyHashFamily,
computedPair.first,
// Note that this depends on prepareToStore() having called
// buf.coalesce()!
ByteRange(computedPair.second.data(), computedPair.second.length()));
}
private:
// Not movable or copyable.
// path_ points into value_, and would need to be updated after
// copying/moving the data. Since no-one needs to copy or move HgProxyHash
// objects, we don't implement this for now.
HgProxyHash(const HgProxyHash&) = delete;
HgProxyHash& operator=(const HgProxyHash&) = delete;
HgProxyHash(HgProxyHash&&) = delete;
HgProxyHash& operator=(HgProxyHash&&) = delete;
HgProxyHash(
Hash edenBlobHash,
StoreResult& infoResult,
folly::StringPiece context) {
if (!infoResult.isValid()) {
XLOG(ERR) << "received unknown mercurial proxy hash "
<< edenBlobHash.toString() << " in " << context;
// Fall through and let infoResult.extractValue() throw
}
value_ = infoResult.extractValue();
parseValue(edenBlobHash);
}
/**
* Serialize the (path, hgRevHash) data into a buffer that will be stored in
* the LocalStore.
*/
static IOBuf serialize(RelativePathPiece path, Hash hgRevHash) {
// We serialize the data as <hash_bytes><path_length><path>
//
// The path_length is stored as a big-endian uint32_t.
auto pathStr = path.stringPiece();
IOBuf buf(
IOBuf::CREATE, Hash::RAW_SIZE + sizeof(uint32_t) + pathStr.size());
Appender appender(&buf, 0);
appender.push(hgRevHash.getBytes());
appender.writeBE<uint32_t>(pathStr.size());
appender.push(pathStr);
return buf;
}
/**
* Parse the serialized data found in value_, and set revHash_ and path_.
*
* The value_ member variable should already contain the serialized data,
* (as returned by serialize()).
*
* Note that path_ will be set to a RelativePathPiece pointing into the
* string data owned by value_. (This lets us avoid copying the string data
* out.)
*/
void parseValue(Hash edenBlobHash) {
ByteRange infoBytes = StringPiece(value_);
// Make sure the data is long enough to contain the rev hash and path length
if (infoBytes.size() < Hash::RAW_SIZE + sizeof(uint32_t)) {
auto msg = folly::to<string>(
"mercurial blob info data for ",
edenBlobHash.toString(),
" is too short (",
infoBytes.size(),
" bytes)");
XLOG(ERR) << msg;
throw std::length_error(msg);
}
// Extract the revHash_
revHash_ = Hash(infoBytes.subpiece(0, Hash::RAW_SIZE));
infoBytes.advance(Hash::RAW_SIZE);
// Extract the path length
uint32_t pathLength;
memcpy(&pathLength, infoBytes.data(), sizeof(uint32_t));
pathLength = Endian::big(pathLength);
infoBytes.advance(sizeof(uint32_t));
// Make sure the path length agrees with the length of data remaining
if (infoBytes.size() != pathLength) {
auto msg = folly::to<string>(
"mercurial blob info data for ",
edenBlobHash.toString(),
" has inconsistent path length");
XLOG(ERR) << msg;
throw std::length_error(msg);
}
// Extract the path_
path_ = RelativePathPiece(StringPiece(infoBytes));
}
/**
* The serialized data.
*/
std::string value_;
/**
* The revision hash.
*/
Hash revHash_;
/**
* The path name. Note that this points into the serialized value_ data.
* path_ itself does not own the data it points to.
*/
RelativePathPiece path_;
};
/**
* Internal helper function for use by getImportHelperPath().
*

View File

@ -0,0 +1,169 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
*/
#include "eden/fs/store/hg/HgProxyHash.h"
#include <folly/futures/Future.h>
#include <folly/io/Cursor.h>
#include <folly/io/IOBuf.h>
#include <folly/logging/xlog.h>
#include "eden/fs/store/LocalStore.h"
#include "eden/fs/store/StoreResult.h"
using folly::ByteRange;
using folly::Endian;
using folly::IOBuf;
using folly::StringPiece;
using folly::io::Appender;
using std::string;
using KeySpace = facebook::eden::LocalStore::KeySpace;
namespace facebook {
namespace eden {
HgProxyHash::HgProxyHash(
LocalStore* store,
Hash edenBlobHash,
StringPiece context) {
// Read the path name and file rev hash
auto infoResult = store->get(KeySpace::HgProxyHashFamily, edenBlobHash);
if (!infoResult.isValid()) {
XLOG(ERR) << "received unknown mercurial proxy hash "
<< edenBlobHash.toString() << " in " << context;
// Fall through and let infoResult.extractValue() throw
}
value_ = infoResult.extractValue();
parseValue(edenBlobHash);
}
folly::Future<std::vector<std::pair<RelativePath, Hash>>> HgProxyHash::getBatch(
LocalStore* store,
const std::vector<Hash>& blobHashes) {
auto hashCopies = std::make_shared<std::vector<Hash>>(blobHashes);
std::vector<folly::ByteRange> byteRanges;
for (auto& hash : *hashCopies) {
byteRanges.push_back(hash.getBytes());
}
return store->getBatch(KeySpace::HgProxyHashFamily, byteRanges)
.then([blobHashes = hashCopies](std::vector<StoreResult>&& data) {
std::vector<std::pair<RelativePath, Hash>> results;
for (size_t i = 0; i < blobHashes->size(); ++i) {
HgProxyHash hgInfo(
blobHashes->at(i), data[i], "prefetchFiles getBatch");
results.emplace_back(hgInfo.path().copy(), hgInfo.revHash());
}
return results;
});
}
Hash HgProxyHash::store(
RelativePathPiece path,
Hash hgRevHash,
LocalStore::WriteBatch* writeBatch) {
auto computedPair = prepareToStore(path, hgRevHash);
HgProxyHash::store(computedPair, writeBatch);
return computedPair.first;
}
std::pair<Hash, IOBuf> HgProxyHash::prepareToStore(
RelativePathPiece path,
Hash hgRevHash) {
// Serialize the (path, hgRevHash) tuple into a buffer.
auto buf = serialize(path, hgRevHash);
// Compute the hash of the serialized buffer
ByteRange serializedInfo = buf.coalesce();
auto edenBlobHash = Hash::sha1(serializedInfo);
return std::make_pair(edenBlobHash, std::move(buf));
}
void HgProxyHash::store(
const std::pair<Hash, IOBuf>& computedPair,
LocalStore::WriteBatch* writeBatch) {
writeBatch->put(
KeySpace::HgProxyHashFamily,
computedPair.first,
// Note that this depends on prepareToStore() having called
// buf.coalesce()!
ByteRange(computedPair.second.data(), computedPair.second.length()));
}
HgProxyHash::HgProxyHash(
Hash edenBlobHash,
StoreResult& infoResult,
StringPiece context) {
if (!infoResult.isValid()) {
XLOG(ERR) << "received unknown mercurial proxy hash "
<< edenBlobHash.toString() << " in " << context;
// Fall through and let infoResult.extractValue() throw
}
value_ = infoResult.extractValue();
parseValue(edenBlobHash);
}
IOBuf HgProxyHash::serialize(RelativePathPiece path, Hash hgRevHash) {
// We serialize the data as <hash_bytes><path_length><path>
//
// The path_length is stored as a big-endian uint32_t.
auto pathStr = path.stringPiece();
IOBuf buf(IOBuf::CREATE, Hash::RAW_SIZE + sizeof(uint32_t) + pathStr.size());
Appender appender(&buf, 0);
appender.push(hgRevHash.getBytes());
appender.writeBE<uint32_t>(pathStr.size());
appender.push(pathStr);
return buf;
}
void HgProxyHash::parseValue(Hash edenBlobHash) {
ByteRange infoBytes = StringPiece(value_);
// Make sure the data is long enough to contain the rev hash and path length
if (infoBytes.size() < Hash::RAW_SIZE + sizeof(uint32_t)) {
auto msg = folly::to<string>(
"mercurial blob info data for ",
edenBlobHash.toString(),
" is too short (",
infoBytes.size(),
" bytes)");
XLOG(ERR) << msg;
throw std::length_error(msg);
}
// Extract the revHash_
revHash_ = Hash(infoBytes.subpiece(0, Hash::RAW_SIZE));
infoBytes.advance(Hash::RAW_SIZE);
// Extract the path length
uint32_t pathLength;
memcpy(&pathLength, infoBytes.data(), sizeof(uint32_t));
pathLength = Endian::big(pathLength);
infoBytes.advance(sizeof(uint32_t));
// Make sure the path length agrees with the length of data remaining
if (infoBytes.size() != pathLength) {
auto msg = folly::to<string>(
"mercurial blob info data for ",
edenBlobHash.toString(),
" has inconsistent path length");
XLOG(ERR) << msg;
throw std::length_error(msg);
}
// Extract the path_
path_ = RelativePathPiece(StringPiece(infoBytes));
}
} // namespace eden
} // namespace facebook

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
*/
#pragma once
#include <string>
#include <vector>
#include "eden/fs/model/Hash.h"
#include "eden/fs/store/LocalStore.h"
#include "eden/fs/utils/PathFuncs.h"
namespace folly {
template <typename T>
class Future;
class IOBuf;
} // namespace folly
namespace facebook {
namespace eden {
/**
* HgProxyHash manages mercurial (path, revHash) data in the LocalStore.
*
* Mercurial doesn't really have a blob hash the same way eden and git do.
* Instead, mercurial file revision hashes are always relative to a specific
* path. To use the data in eden, we need to create a blob hash that we can
* use instead.
*
* To do so, we hash the (path, revHash) tuple, and use this hash as the blob
* hash in eden. We store the eden_blob_hash --> (path, hgRevHash) mapping
* in the LocalStore. The HgProxyHash class helps store and retrieve these
* mappings.
*/
class HgProxyHash {
public:
/**
* Load HgProxyHash data for the given eden blob hash from the LocalStore.
*/
HgProxyHash(LocalStore* store, Hash edenBlobHash, folly::StringPiece context);
~HgProxyHash() {}
const RelativePathPiece& path() const {
return path_;
}
const Hash& revHash() const {
return revHash_;
}
static folly::Future<std::vector<std::pair<RelativePath, Hash>>> getBatch(
LocalStore* store,
const std::vector<Hash>& blobHashes);
/**
* Store HgProxyHash data in the LocalStore.
*
* Returns an eden blob hash that can be used to retrieve the data later
* (using the HgProxyHash constructor defined above).
*/
static Hash store(
RelativePathPiece path,
Hash hgRevHash,
LocalStore::WriteBatch* writeBatch);
/**
* Compute the proxy hash information, but do not store it.
*
* This is useful when you need the proxy hash but don't want to commit
* the data until after you have written an associated data item.
* Returns the proxy hash and the data that should be written;
* the caller is responsible for passing the pair to the HgProxyHash::store()
* method below at the appropriate time.
*/
static std::pair<Hash, folly::IOBuf> prepareToStore(
RelativePathPiece path,
Hash hgRevHash);
/**
* Store precomputed proxy hash information.
* Stores the data computed by prepareToStore().
*/
static void store(
const std::pair<Hash, folly::IOBuf>& computedPair,
LocalStore::WriteBatch* writeBatch);
private:
// Not movable or copyable.
// path_ points into value_, and would need to be updated after
// copying/moving the data. Since no-one needs to copy or move HgProxyHash
// objects, we don't implement this for now.
HgProxyHash(const HgProxyHash&) = delete;
HgProxyHash& operator=(const HgProxyHash&) = delete;
HgProxyHash(HgProxyHash&&) = delete;
HgProxyHash& operator=(HgProxyHash&&) = delete;
HgProxyHash(
Hash edenBlobHash,
StoreResult& infoResult,
folly::StringPiece context);
/**
* Serialize the (path, hgRevHash) data into a buffer that will be stored in
* the LocalStore.
*/
static folly::IOBuf serialize(RelativePathPiece path, Hash hgRevHash);
/**
* Parse the serialized data found in value_, and set revHash_ and path_.
*
* The value_ member variable should already contain the serialized data,
* (as returned by serialize()).
*
* Note that path_ will be set to a RelativePathPiece pointing into the
* string data owned by value_. (This lets us avoid copying the string data
* out.)
*/
void parseValue(Hash edenBlobHash);
/**
* The serialized data as written in the LocalStore.
*/
std::string value_;
/**
* The revision hash.
*/
Hash revHash_;
/**
* The path name. Note that this points into the serialized value_ data.
* path_ itself does not own the data it points to.
*/
RelativePathPiece path_;
};
} // namespace eden
} // namespace facebook