2018-02-07 22:45:41 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2016-present, Facebook, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This source code is licensed under the BSD-style license found in the
|
|
|
|
* LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
* of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
#include "eden/fs/store/RocksDbLocalStore.h"
|
2019-05-15 22:15:49 +03:00
|
|
|
|
|
|
|
#include <array>
|
|
|
|
|
2018-02-07 22:45:41 +03:00
|
|
|
#include <folly/Format.h>
|
|
|
|
#include <folly/String.h>
|
2018-05-10 04:41:22 +03:00
|
|
|
#include <folly/futures/Future.h>
|
2018-02-07 22:45:41 +03:00
|
|
|
#include <folly/io/Cursor.h>
|
|
|
|
#include <folly/io/IOBuf.h>
|
|
|
|
#include <folly/lang/Bits.h>
|
2018-05-01 07:20:51 +03:00
|
|
|
#include <folly/logging/xlog.h>
|
2019-04-15 22:26:00 +03:00
|
|
|
#include <rocksdb/convenience.h>
|
2018-02-07 22:45:41 +03:00
|
|
|
#include <rocksdb/db.h>
|
|
|
|
#include <rocksdb/filter_policy.h>
|
|
|
|
#include <rocksdb/table.h>
|
2019-05-15 22:15:49 +03:00
|
|
|
|
2018-02-07 22:45:41 +03:00
|
|
|
#include "eden/fs/rocksdb/RocksException.h"
|
|
|
|
#include "eden/fs/rocksdb/RocksHandles.h"
|
2019-05-15 22:15:49 +03:00
|
|
|
#include "eden/fs/store/KeySpaces.h"
|
2018-02-07 22:45:41 +03:00
|
|
|
#include "eden/fs/store/StoreResult.h"
|
2019-03-13 05:25:54 +03:00
|
|
|
#include "eden/fs/utils/FaultInjector.h"
|
2018-02-07 22:45:41 +03:00
|
|
|
|
|
|
|
using facebook::eden::Hash;
|
|
|
|
using folly::ByteRange;
|
|
|
|
using folly::IOBuf;
|
|
|
|
using folly::StringPiece;
|
|
|
|
using folly::io::Cursor;
|
2019-03-18 21:33:22 +03:00
|
|
|
using rocksdb::FlushOptions;
|
2018-02-07 22:45:41 +03:00
|
|
|
using rocksdb::ReadOptions;
|
|
|
|
using rocksdb::Slice;
|
|
|
|
using rocksdb::SliceParts;
|
|
|
|
using rocksdb::WriteBatch;
|
|
|
|
using rocksdb::WriteOptions;
|
|
|
|
using std::string;
|
|
|
|
using std::unique_ptr;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
using namespace facebook::eden;
|
|
|
|
|
|
|
|
rocksdb::ColumnFamilyOptions makeColumnOptions(uint64_t LRUblockCacheSizeMB) {
|
|
|
|
rocksdb::ColumnFamilyOptions options;
|
|
|
|
|
|
|
|
// We'll never perform range scans on any of the keys that we store.
|
|
|
|
// This enables bloom filters and a hash policy that improves our
|
|
|
|
// get/put performance.
|
|
|
|
options.OptimizeForPointLookup(LRUblockCacheSizeMB);
|
|
|
|
|
|
|
|
options.OptimizeLevelStyleCompaction();
|
|
|
|
return options;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The different key spaces that we desire.
|
|
|
|
* The ordering is coupled with the values of the LocalStore::KeySpace enum.
|
|
|
|
*/
|
|
|
|
const std::vector<rocksdb::ColumnFamilyDescriptor>& columnFamilies() {
|
2019-05-15 22:15:49 +03:00
|
|
|
auto makeColumnFamilyDescriptors = [] {
|
|
|
|
// Most of the column families will share the same cache. We
|
|
|
|
// want the blob data to live in its own smaller cache; the assumption
|
|
|
|
// is that the vfs cache will compensate for that, together with the
|
|
|
|
// idea that we shouldn't need to materialize a great many files.
|
|
|
|
auto options = makeColumnOptions(64);
|
|
|
|
auto blobOptions = makeColumnOptions(8);
|
2018-02-07 22:45:41 +03:00
|
|
|
|
2019-05-15 22:15:49 +03:00
|
|
|
// Meyers singleton to avoid SIOF issues
|
|
|
|
std::vector<rocksdb::ColumnFamilyDescriptor> families;
|
|
|
|
for (size_t ks = 0; ks < kKeySpaceRecords.size(); ++ks) {
|
|
|
|
families.emplace_back(
|
|
|
|
kKeySpaceRecords[ks].name.str(),
|
|
|
|
(ks == LocalStore::BlobFamily) ? blobOptions : options);
|
|
|
|
}
|
|
|
|
// Put the default column family last.
|
|
|
|
// This way the KeySpace enum values can be used directly as indexes
|
|
|
|
// into our column family vectors.
|
|
|
|
families.emplace_back(rocksdb::kDefaultColumnFamilyName, options);
|
|
|
|
return families;
|
2018-02-07 22:45:41 +03:00
|
|
|
};
|
2019-05-15 22:15:49 +03:00
|
|
|
|
|
|
|
// Meyers singleton to avoid SIOF issues
|
|
|
|
static const std::vector<rocksdb::ColumnFamilyDescriptor> families =
|
|
|
|
makeColumnFamilyDescriptors();
|
2018-02-07 22:45:41 +03:00
|
|
|
return families;
|
|
|
|
}
|
|
|
|
|
2019-05-15 22:15:49 +03:00
|
|
|
/**
|
|
|
|
* Return a rocksdb::Range that contains all possible keys that we store.
|
|
|
|
*
|
|
|
|
* The input string will be used to store data for the Range slices.
|
|
|
|
* The caller must ensure that the rangeStorage parameter remains valid and
|
|
|
|
* unmodified until they are done using the returned Range.
|
|
|
|
*/
|
|
|
|
rocksdb::Range getFullRange(std::string& rangeStorage) {
|
|
|
|
// An empty slice is the lowest possible value.
|
|
|
|
Slice begin;
|
|
|
|
// All of our keys are currently 20 bytes.
|
|
|
|
// Use a longer key to ensure that this is greater than any valid key.
|
|
|
|
rangeStorage = std::string(
|
|
|
|
21, static_cast<char>(std::numeric_limits<unsigned char>::max()));
|
|
|
|
Slice end(rangeStorage);
|
|
|
|
return rocksdb::Range(begin, end);
|
|
|
|
}
|
|
|
|
|
2018-02-07 22:45:41 +03:00
|
|
|
rocksdb::Slice _createSlice(folly::ByteRange bytes) {
|
|
|
|
return Slice(reinterpret_cast<const char*>(bytes.data()), bytes.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
class RocksDbWriteBatch : public LocalStore::WriteBatch {
|
|
|
|
public:
|
|
|
|
void put(
|
|
|
|
LocalStore::KeySpace keySpace,
|
|
|
|
folly::ByteRange key,
|
|
|
|
folly::ByteRange value) override;
|
|
|
|
void put(
|
|
|
|
LocalStore::KeySpace keySpace,
|
|
|
|
folly::ByteRange key,
|
|
|
|
std::vector<folly::ByteRange> valueSlices) override;
|
|
|
|
void flush() override;
|
2019-02-14 23:42:47 +03:00
|
|
|
~RocksDbWriteBatch() override;
|
2018-02-07 22:45:41 +03:00
|
|
|
// Use LocalStore::beginWrite() to create a write batch
|
|
|
|
RocksDbWriteBatch(RocksHandles& dbHandles, size_t bufferSize);
|
|
|
|
|
|
|
|
void flushIfNeeded();
|
|
|
|
|
|
|
|
RocksHandles& dbHandles_;
|
|
|
|
rocksdb::WriteBatch writeBatch_;
|
|
|
|
size_t bufSize_;
|
|
|
|
};
|
|
|
|
|
|
|
|
void RocksDbWriteBatch::flush() {
|
|
|
|
auto pending = writeBatch_.Count();
|
|
|
|
if (pending == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
XLOG(DBG5) << "Flushing " << pending << " entries with data size of "
|
|
|
|
<< writeBatch_.GetDataSize();
|
|
|
|
|
|
|
|
auto status = dbHandles_.db->Write(WriteOptions(), &writeBatch_);
|
|
|
|
XLOG(DBG5) << "... Flushed";
|
|
|
|
|
|
|
|
if (!status.ok()) {
|
|
|
|
throw RocksException::build(
|
|
|
|
status, "error putting blob batch in local store");
|
|
|
|
}
|
|
|
|
|
|
|
|
writeBatch_.Clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
void RocksDbWriteBatch::flushIfNeeded() {
|
|
|
|
auto needFlush = bufSize_ > 0 && writeBatch_.GetDataSize() >= bufSize_;
|
|
|
|
|
|
|
|
if (needFlush) {
|
|
|
|
flush();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
RocksDbWriteBatch::RocksDbWriteBatch(RocksHandles& dbHandles, size_t bufSize)
|
|
|
|
: LocalStore::WriteBatch(),
|
|
|
|
dbHandles_(dbHandles),
|
|
|
|
writeBatch_(bufSize),
|
|
|
|
bufSize_(bufSize) {}
|
|
|
|
|
|
|
|
RocksDbWriteBatch::~RocksDbWriteBatch() {
|
|
|
|
if (writeBatch_.Count() > 0) {
|
|
|
|
XLOG(ERR) << "WriteBatch being destroyed with " << writeBatch_.Count()
|
|
|
|
<< " items pending flush";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void RocksDbWriteBatch::put(
|
|
|
|
LocalStore::KeySpace keySpace,
|
|
|
|
folly::ByteRange key,
|
|
|
|
folly::ByteRange value) {
|
|
|
|
writeBatch_.Put(
|
|
|
|
dbHandles_.columns[keySpace].get(),
|
|
|
|
_createSlice(key),
|
|
|
|
_createSlice(value));
|
|
|
|
|
|
|
|
flushIfNeeded();
|
|
|
|
}
|
|
|
|
|
|
|
|
void RocksDbWriteBatch::put(
|
|
|
|
LocalStore::KeySpace keySpace,
|
|
|
|
folly::ByteRange key,
|
|
|
|
std::vector<folly::ByteRange> valueSlices) {
|
|
|
|
std::vector<Slice> slices;
|
|
|
|
|
|
|
|
for (auto& valueSlice : valueSlices) {
|
|
|
|
slices.emplace_back(_createSlice(valueSlice));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto keySlice = _createSlice(key);
|
|
|
|
SliceParts keyParts(&keySlice, 1);
|
|
|
|
writeBatch_.Put(
|
|
|
|
dbHandles_.columns[keySpace].get(),
|
|
|
|
keyParts,
|
|
|
|
SliceParts(slices.data(), slices.size()));
|
|
|
|
|
|
|
|
flushIfNeeded();
|
|
|
|
}
|
|
|
|
|
2019-04-25 05:59:48 +03:00
|
|
|
rocksdb::Options getRocksdbOptions() {
|
|
|
|
rocksdb::Options options;
|
|
|
|
// Optimize RocksDB. This is the easiest way to get RocksDB to perform well.
|
|
|
|
options.IncreaseParallelism();
|
|
|
|
|
|
|
|
// Create the DB if it's not already present.
|
|
|
|
options.create_if_missing = true;
|
|
|
|
// Automatically create column families as we define new ones.
|
|
|
|
options.create_missing_column_families = true;
|
|
|
|
|
|
|
|
return options;
|
|
|
|
}
|
|
|
|
|
2019-05-21 00:07:33 +03:00
|
|
|
RocksHandles openDB(AbsolutePathPiece path, RocksDBOpenMode mode) {
|
2019-04-25 05:59:48 +03:00
|
|
|
auto options = getRocksdbOptions();
|
|
|
|
try {
|
2019-05-21 00:07:33 +03:00
|
|
|
return RocksHandles(path.stringPiece(), mode, options, columnFamilies());
|
2019-04-25 05:59:48 +03:00
|
|
|
} catch (const RocksException& ex) {
|
|
|
|
XLOG(ERR) << "Error opening RocksDB storage at " << path << ": "
|
|
|
|
<< ex.what();
|
2019-05-21 00:07:33 +03:00
|
|
|
if (mode == RocksDBOpenMode::ReadOnly) {
|
|
|
|
// In read-only mode fail rather than attempting to repair the DB.
|
|
|
|
throw;
|
|
|
|
}
|
2019-04-25 05:59:48 +03:00
|
|
|
// Fall through and attempt to repair the DB
|
|
|
|
}
|
|
|
|
|
2019-04-25 05:59:48 +03:00
|
|
|
RocksDbLocalStore::repairDB(path);
|
2019-04-25 05:59:48 +03:00
|
|
|
|
|
|
|
// Now try opening the DB again.
|
2019-05-21 00:07:33 +03:00
|
|
|
return RocksHandles(path.stringPiece(), mode, options, columnFamilies());
|
2019-04-25 05:59:48 +03:00
|
|
|
}
|
|
|
|
|
2018-02-07 22:45:41 +03:00
|
|
|
} // namespace
|
|
|
|
|
|
|
|
namespace facebook {
|
|
|
|
namespace eden {
|
|
|
|
|
2018-11-09 22:20:16 +03:00
|
|
|
RocksDbLocalStore::RocksDbLocalStore(
|
|
|
|
AbsolutePathPiece pathToRocksDb,
|
2019-05-21 00:07:33 +03:00
|
|
|
FaultInjector* faultInjector,
|
|
|
|
RocksDBOpenMode mode)
|
2019-05-15 22:15:49 +03:00
|
|
|
: faultInjector_(*faultInjector),
|
2019-05-21 00:07:33 +03:00
|
|
|
dbHandles_(openDB(pathToRocksDb, mode)),
|
2019-04-25 05:59:48 +03:00
|
|
|
ioPool_(12, "RocksLocalStore") {}
|
2018-02-07 22:45:41 +03:00
|
|
|
|
|
|
|
RocksDbLocalStore::~RocksDbLocalStore() {
|
|
|
|
#ifdef FOLLY_SANITIZE_ADDRESS
|
|
|
|
// RocksDB has some race conditions around setting up and tearing down
|
|
|
|
// the threads that it uses to maintain the database. This manifests
|
|
|
|
// in our test harness, particularly in a test where we quickly mount
|
|
|
|
// and then unmount. We see this as an abort with the message:
|
|
|
|
// "pthread lock: Invalid Argument".
|
|
|
|
// My assumption is that we're shutting things down before rocks has
|
|
|
|
// completed initializing. This sleep call is present in the destructor
|
|
|
|
// to make it more likely that rocks is past that critical point and
|
|
|
|
// so that we can shutdown successfully.
|
|
|
|
/* sleep override */ sleep(1);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void RocksDbLocalStore::close() {
|
2019-04-25 05:59:48 +03:00
|
|
|
dbHandles_.close();
|
2018-02-07 22:45:41 +03:00
|
|
|
}
|
|
|
|
|
2019-04-25 05:59:48 +03:00
|
|
|
void RocksDbLocalStore::repairDB(AbsolutePathPiece path) {
|
|
|
|
XLOG(ERR) << "Attempting to repair RocksDB " << path;
|
|
|
|
rocksdb::ColumnFamilyOptions unknownColumFamilyOptions;
|
|
|
|
unknownColumFamilyOptions.OptimizeForPointLookup(8);
|
|
|
|
unknownColumFamilyOptions.OptimizeLevelStyleCompaction();
|
|
|
|
|
|
|
|
const auto& columnDescriptors = columnFamilies();
|
|
|
|
|
|
|
|
auto dbPathStr = path.stringPiece().str();
|
|
|
|
rocksdb::DBOptions dbOptions(getRocksdbOptions());
|
|
|
|
auto status = RepairDB(
|
|
|
|
dbPathStr, dbOptions, columnDescriptors, unknownColumFamilyOptions);
|
|
|
|
if (!status.ok()) {
|
|
|
|
throw RocksException::build(status, "unable to repair RocksDB at ", path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-31 20:39:49 +03:00
|
|
|
void RocksDbLocalStore::clearKeySpace(KeySpace keySpace) {
|
|
|
|
auto columnFamily = dbHandles_.columns[keySpace].get();
|
|
|
|
std::unique_ptr<rocksdb::Iterator> it{
|
|
|
|
dbHandles_.db->NewIterator(ReadOptions(), columnFamily)};
|
2019-04-15 22:26:00 +03:00
|
|
|
XLOG(DBG2) << "clearing column family \"" << columnFamily->GetName() << "\"";
|
2019-05-15 22:15:49 +03:00
|
|
|
std::string rangeStorage;
|
|
|
|
const auto fullRange = getFullRange(rangeStorage);
|
2019-04-15 22:26:00 +03:00
|
|
|
|
|
|
|
// Delete all SST files that only contain keys in the specified range.
|
|
|
|
// Since we are deleting everything in this column family this should
|
|
|
|
// effectively delete everything.
|
2019-05-15 22:15:49 +03:00
|
|
|
auto status = DeleteFilesInRange(
|
|
|
|
dbHandles_.db.get(), columnFamily, &fullRange.start, &fullRange.limit);
|
2019-04-15 22:26:00 +03:00
|
|
|
if (!status.ok()) {
|
|
|
|
throw RocksException::build(
|
|
|
|
status,
|
|
|
|
"error deleting data in \"",
|
|
|
|
columnFamily->GetName(),
|
|
|
|
"\" column family");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Call DeleteRange() as well. In theory DeleteFilesInRange may not delete
|
|
|
|
// everything in the range (but it probably will in our case since we are
|
|
|
|
// intending to delete everything).
|
2018-05-31 20:39:49 +03:00
|
|
|
const WriteOptions writeOptions;
|
2019-05-15 22:15:49 +03:00
|
|
|
status = dbHandles_.db->DeleteRange(
|
|
|
|
writeOptions, columnFamily, fullRange.start, fullRange.limit);
|
2019-04-15 22:26:00 +03:00
|
|
|
if (!status.ok()) {
|
|
|
|
throw RocksException::build(
|
|
|
|
status,
|
|
|
|
"error deleting data in \"",
|
|
|
|
columnFamily->GetName(),
|
|
|
|
"\" column family");
|
2018-05-31 20:39:49 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-10 21:09:48 +03:00
|
|
|
void RocksDbLocalStore::compactKeySpace(KeySpace keySpace) {
|
2018-05-31 20:39:58 +03:00
|
|
|
auto options = rocksdb::CompactRangeOptions{};
|
|
|
|
options.allow_write_stall = true;
|
2018-08-10 21:09:48 +03:00
|
|
|
auto columnFamily = dbHandles_.columns[keySpace].get();
|
2019-04-15 22:26:00 +03:00
|
|
|
XLOG(DBG2) << "compacting column family \"" << columnFamily->GetName()
|
|
|
|
<< "\"";
|
|
|
|
auto status = dbHandles_.db->CompactRange(
|
2018-08-10 21:09:48 +03:00
|
|
|
options, columnFamily, /*begin=*/nullptr, /*end=*/nullptr);
|
2019-04-15 22:26:00 +03:00
|
|
|
if (!status.ok()) {
|
|
|
|
throw RocksException::build(
|
|
|
|
status,
|
|
|
|
"error compacting \"",
|
|
|
|
columnFamily->GetName(),
|
|
|
|
"\" column family");
|
|
|
|
}
|
2018-05-31 20:39:58 +03:00
|
|
|
}
|
|
|
|
|
2018-02-07 22:45:41 +03:00
|
|
|
StoreResult RocksDbLocalStore::get(LocalStore::KeySpace keySpace, ByteRange key)
|
|
|
|
const {
|
|
|
|
string value;
|
2018-05-31 20:39:49 +03:00
|
|
|
auto status = dbHandles_.db->Get(
|
2018-02-07 22:45:41 +03:00
|
|
|
ReadOptions(),
|
|
|
|
dbHandles_.columns[keySpace].get(),
|
|
|
|
_createSlice(key),
|
|
|
|
&value);
|
|
|
|
if (!status.ok()) {
|
|
|
|
if (status.IsNotFound()) {
|
|
|
|
// Return an empty StoreResult
|
|
|
|
return StoreResult();
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: RocksDB can return a "TryAgain" error.
|
|
|
|
// Should we try again for the user, rather than re-throwing the error?
|
|
|
|
|
|
|
|
// We don't use RocksException::check(), since we don't want to waste our
|
|
|
|
// time computing the hex string of the key if we succeeded.
|
|
|
|
throw RocksException::build(
|
|
|
|
status, "failed to get ", folly::hexlify(key), " from local store");
|
|
|
|
}
|
|
|
|
return StoreResult(std::move(value));
|
|
|
|
}
|
|
|
|
|
2018-05-10 04:41:22 +03:00
|
|
|
FOLLY_NODISCARD folly::Future<StoreResult> RocksDbLocalStore::getFuture(
|
|
|
|
KeySpace keySpace,
|
|
|
|
folly::ByteRange key) const {
|
|
|
|
// We're really just passing key through to the get() method, but we need to
|
|
|
|
// make a copy of it on the way through. It will usually be an eden::Hash
|
|
|
|
// but can potentially be an arbitrary length so we can't just use Hash as
|
|
|
|
// the storage here. std::string is appropriate, but there's some noise
|
|
|
|
// with the conversion from unsigned/signed and back again.
|
2019-03-13 05:25:54 +03:00
|
|
|
return faultInjector_.checkAsync("local store get single", "")
|
|
|
|
.via(&ioPool_)
|
|
|
|
.thenValue([keySpace,
|
|
|
|
key = std::string(
|
|
|
|
reinterpret_cast<const char*>(key.data()), key.size()),
|
|
|
|
this](folly::Unit&&) {
|
2018-05-10 04:41:22 +03:00
|
|
|
return get(
|
|
|
|
keySpace,
|
|
|
|
folly::ByteRange(
|
|
|
|
reinterpret_cast<const unsigned char*>(key.data()),
|
|
|
|
key.size()));
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-05-25 23:47:54 +03:00
|
|
|
FOLLY_NODISCARD folly::Future<std::vector<StoreResult>>
|
|
|
|
RocksDbLocalStore::getBatch(
|
|
|
|
KeySpace keySpace,
|
|
|
|
const std::vector<folly::ByteRange>& keys) const {
|
|
|
|
std::vector<folly::Future<std::vector<StoreResult>>> futures;
|
|
|
|
|
|
|
|
std::vector<std::shared_ptr<std::vector<std::string>>> batches;
|
|
|
|
batches.emplace_back(std::make_shared<std::vector<std::string>>());
|
|
|
|
|
|
|
|
for (auto& key : keys) {
|
|
|
|
if (batches.back()->size() >= 2048) {
|
|
|
|
batches.emplace_back(std::make_shared<std::vector<std::string>>());
|
|
|
|
}
|
|
|
|
batches.back()->emplace_back(
|
|
|
|
reinterpret_cast<const char*>(key.data()), key.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& batch : batches) {
|
|
|
|
futures.emplace_back(
|
2019-03-13 05:25:54 +03:00
|
|
|
faultInjector_.checkAsync("local store get batch", "")
|
|
|
|
.via(&ioPool_)
|
|
|
|
.thenValue(
|
|
|
|
[this, keySpace, keys = std::move(batch)](folly::Unit&&) {
|
2019-04-02 00:45:55 +03:00
|
|
|
XLOG(DBG3) << __func__ << " starting to actually do work";
|
2019-03-13 05:25:54 +03:00
|
|
|
std::vector<Slice> keySlices;
|
|
|
|
std::vector<std::string> values;
|
|
|
|
std::vector<rocksdb::ColumnFamilyHandle*> columns;
|
|
|
|
for (auto& key : *keys) {
|
|
|
|
keySlices.emplace_back(key);
|
|
|
|
columns.emplace_back(dbHandles_.columns[keySpace].get());
|
|
|
|
}
|
|
|
|
auto statuses = dbHandles_.db->MultiGet(
|
|
|
|
ReadOptions(), columns, keySlices, &values);
|
|
|
|
|
|
|
|
std::vector<StoreResult> results;
|
|
|
|
for (size_t i = 0; i < keys->size(); ++i) {
|
|
|
|
auto& status = statuses[i];
|
|
|
|
if (!status.ok()) {
|
|
|
|
if (status.IsNotFound()) {
|
|
|
|
// Return an empty StoreResult
|
|
|
|
results.emplace_back(); // StoreResult();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: RocksDB can return a "TryAgain" error.
|
|
|
|
// Should we try again for the user, rather than
|
|
|
|
// re-throwing the error?
|
|
|
|
|
|
|
|
// We don't use RocksException::check(), since we don't
|
|
|
|
// want to waste our time computing the hex string of the
|
|
|
|
// key if we succeeded.
|
|
|
|
throw RocksException::build(
|
|
|
|
status,
|
|
|
|
"failed to get ",
|
|
|
|
folly::hexlify(keys->at(i)),
|
|
|
|
" from local store");
|
|
|
|
}
|
|
|
|
results.emplace_back(std::move(values[i]));
|
|
|
|
}
|
|
|
|
return results;
|
|
|
|
}));
|
2018-05-25 23:47:54 +03:00
|
|
|
}
|
|
|
|
|
2018-10-23 23:39:59 +03:00
|
|
|
return folly::collect(futures).thenValue(
|
2018-05-25 23:47:54 +03:00
|
|
|
[](std::vector<std::vector<StoreResult>>&& tries) {
|
|
|
|
std::vector<StoreResult> results;
|
|
|
|
for (auto& batch : tries) {
|
|
|
|
results.insert(
|
|
|
|
results.end(),
|
|
|
|
make_move_iterator(batch.begin()),
|
|
|
|
make_move_iterator(batch.end()));
|
|
|
|
}
|
|
|
|
return results;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-02-07 22:45:41 +03:00
|
|
|
bool RocksDbLocalStore::hasKey(
|
|
|
|
LocalStore::KeySpace keySpace,
|
|
|
|
folly::ByteRange key) const {
|
|
|
|
string value;
|
|
|
|
auto status = dbHandles_.db->Get(
|
|
|
|
ReadOptions(),
|
|
|
|
dbHandles_.columns[keySpace].get(),
|
|
|
|
_createSlice(key),
|
|
|
|
&value);
|
|
|
|
if (!status.ok()) {
|
|
|
|
if (status.IsNotFound()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: RocksDB can return a "TryAgain" error.
|
|
|
|
// Should we try again for the user, rather than re-throwing the error?
|
|
|
|
|
|
|
|
// We don't use RocksException::check(), since we don't want to waste our
|
|
|
|
// time computing the hex string of the key if we succeeded.
|
|
|
|
throw RocksException::build(
|
|
|
|
status, "failed to get ", folly::hexlify(key), " from local store");
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<LocalStore::WriteBatch> RocksDbLocalStore::beginWrite(
|
|
|
|
size_t bufSize) {
|
|
|
|
return std::make_unique<RocksDbWriteBatch>(dbHandles_, bufSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
void RocksDbLocalStore::put(
|
|
|
|
LocalStore::KeySpace keySpace,
|
|
|
|
folly::ByteRange key,
|
|
|
|
folly::ByteRange value) {
|
|
|
|
dbHandles_.db->Put(
|
|
|
|
WriteOptions(),
|
|
|
|
dbHandles_.columns[keySpace].get(),
|
|
|
|
_createSlice(key),
|
|
|
|
_createSlice(value));
|
|
|
|
}
|
|
|
|
|
2019-05-15 22:15:49 +03:00
|
|
|
uint64_t RocksDbLocalStore::getApproximateSize(
|
|
|
|
LocalStore::KeySpace keySpace) const {
|
|
|
|
uint64_t size = 0;
|
2019-06-15 04:10:35 +03:00
|
|
|
|
|
|
|
// kLiveSstFilesSize reports the size of all "live" sst files.
|
|
|
|
// This excludes sst files from older snapshot versions that RocksDB may
|
|
|
|
// still be holding onto. e.g., to provide a consistent view to iterators.
|
|
|
|
// kTotalSstFilesSize would report the size of all sst files if we wanted to
|
|
|
|
// report that.
|
|
|
|
uint64_t sstFilesSize;
|
|
|
|
auto result = dbHandles_.db->GetIntProperty(
|
|
|
|
dbHandles_.columns[keySpace].get(),
|
|
|
|
rocksdb::DB::Properties::kLiveSstFilesSize,
|
|
|
|
&sstFilesSize);
|
|
|
|
if (result) {
|
|
|
|
size += sstFilesSize;
|
|
|
|
} else {
|
|
|
|
XLOG(WARN) << "unable to retrieve SST file size from RocksDB for key space "
|
|
|
|
<< dbHandles_.columns[keySpace]->GetName();
|
|
|
|
}
|
|
|
|
|
|
|
|
// kSizeAllMemTables reports the size of the memtables.
|
|
|
|
// This is the in-memory space for tracking the data in *.log files that have
|
|
|
|
// not yet been compacted into a .sst file.
|
|
|
|
//
|
|
|
|
// We use this as a something that will hopefully roughly approximate the size
|
|
|
|
// of the *.log files. In practice this generally seems to be a fair amount
|
|
|
|
// smaller than the on-disk *.log file size, except immediately after a
|
|
|
|
// compaction when there is still a couple MB of in-memory metadata despite
|
|
|
|
// having no uncompacted on-disk data.
|
|
|
|
uint64_t memtableSize;
|
|
|
|
result = dbHandles_.db->GetIntProperty(
|
|
|
|
dbHandles_.columns[keySpace].get(),
|
|
|
|
rocksdb::DB::Properties::kSizeAllMemTables,
|
|
|
|
&memtableSize);
|
|
|
|
if (result) {
|
|
|
|
size += memtableSize;
|
|
|
|
} else {
|
|
|
|
XLOG(WARN) << "unable to retrieve memtable size from RocksDB for key space "
|
|
|
|
<< dbHandles_.columns[keySpace]->GetName();
|
|
|
|
}
|
|
|
|
|
2019-05-15 22:15:49 +03:00
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2018-02-07 22:45:41 +03:00
|
|
|
} // namespace eden
|
|
|
|
} // namespace facebook
|