sapling/eden/fs/takeover/TakeoverClient.cpp
Genevieve Helsel 9944a5dff5 add EdenServer recovery step and recover after failed takeover data send handshake
Summary:
* This adds a `EdenServer::recover()` method to start back up on unsuccessful takeover data send.
    * On an unsuccessful ping, filfill the `shutdownPromise` with a `TakeoverSendError` continaing the constructed `TakeoverData`. After this `recover` function is called, `takeoverPromise_` is reset, `takeoverShutdown` is set to `false`, and the `runningState_` is set to `RUNNING`.
With taking over from the returned `TakeoverData`, the user will not encounter `Transport not connected` errors on recovery.

* This adds a `EdenServer::closeStorage()` method to defer closing the `backingStore_` and `localStore_` until after our ready handshake is successful.
* This defers the shutdown of the `PrivHelper` until a successful ready handshake.

I also update the takeover documentation here with the new logic (and fix some formatting issues)

Reviewed By: simpkins

Differential Revision: D20433433

fbshipit-source-id: f59e660922674d281957e80aee5049735b901a2c
2020-04-07 09:52:21 -07:00

120 lines
4.2 KiB
C++

/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#include "eden/fs/takeover/TakeoverClient.h"
#include <folly/io/Cursor.h>
#include <folly/io/async/EventBase.h>
#include <folly/logging/xlog.h>
#include <thrift/lib/cpp2/protocol/Serializer.h>
#include "eden/fs/takeover/TakeoverData.h"
#include "eden/fs/takeover/gen-cpp2/takeover_types.h"
#include "eden/fs/utils/FutureUnixSocket.h"
using apache::thrift::CompactSerializer;
using std::string;
/**
* Five minutes is a high default value. This could be lowered back to one
* minute after takeover no longer does O(loaded) expensive save operations.
*/
DEFINE_int32(
takeoverReceiveTimeout,
300,
"Timeout for receiving takeover data from old process in seconds");
namespace facebook {
namespace eden {
TakeoverData takeoverMounts(
AbsolutePathPiece socketPath,
bool shouldPing,
const std::set<int32_t>& supportedVersions) {
folly::EventBase evb;
folly::Expected<UnixSocket::Message, folly::exception_wrapper>
expectedMessage;
auto connectTimeout = std::chrono::seconds(1);
FutureUnixSocket socket;
socket.connect(&evb, socketPath.stringPiece(), connectTimeout)
.thenValue([&socket, supportedVersions](auto&&) {
// Send our protocol version so that the server knows
// whether we're capable of handshaking successfully
TakeoverVersionQuery query;
query.versions = supportedVersions;
return socket.send(
CompactSerializer::serialize<folly::IOBufQueue>(query).move());
})
.thenValue([&socket](auto&&) {
// Wait for a response. this will either be a "ready" ping or the
// takeover data depending on the server protocol
auto timeout = std::chrono::seconds(FLAGS_takeoverReceiveTimeout);
return socket.receive(timeout);
})
.thenValue([&socket, shouldPing](UnixSocket::Message&& msg) {
if (TakeoverData::isPing(&msg.data)) {
if (shouldPing) {
// Just send an empty message back here, the server knows it sent a
// ping so it does not need to parse the message.
UnixSocket::Message ping;
return socket.send(std::move(ping)).thenValue([&socket](auto&&) {
// Wait for the takeover data response
auto timeout = std::chrono::seconds(FLAGS_takeoverReceiveTimeout);
return socket.receive(timeout);
});
} else {
// This should only be hit during integration tests.
return folly::makeFuture<UnixSocket::Message>(
folly::exception_wrapper(std::runtime_error(
"ping received but should not respond")));
}
} else {
// Older versions of EdenFS will not send a "ready" ping and
// could simply send the takeover data.
return folly::makeFuture<UnixSocket::Message>(std::move(msg));
}
})
.thenValue([&expectedMessage](UnixSocket::Message&& msg) {
expectedMessage = std::move(msg);
})
.thenError([&expectedMessage](folly::exception_wrapper&& ew) {
expectedMessage = folly::makeUnexpected(std::move(ew));
})
.ensure([&evb] { evb.terminateLoopSoon(); });
evb.loop();
if (!expectedMessage) {
XLOG(ERR) << "error receiving takeover data: " << expectedMessage.error();
expectedMessage.error().throw_exception();
}
auto& message = expectedMessage.value();
auto data = TakeoverData::deserialize(&message.data);
// Add 2 here for the lock file and the thrift socket
if (data.mountPoints.size() + 2 != message.files.size()) {
throw std::runtime_error(folly::to<string>(
"received ",
data.mountPoints.size(),
" mount paths, but ",
message.files.size(),
" FDs (including the lock file FD)"));
}
data.lockFile = std::move(message.files[0]);
data.thriftSocket = std::move(message.files[1]);
for (size_t n = 0; n < data.mountPoints.size(); ++n) {
auto& mountInfo = data.mountPoints[n];
mountInfo.fuseFD = std::move(message.files[n + 2]);
}
return data;
}
} // namespace eden
} // namespace facebook