sapling/eden/fs/config/CheckoutConfig.cpp
Xavier Deguillard 8853701e91 path: forbid building non-utf8 paths
Summary:
The world has moved on utf-8 as the default encoding for files and data, but
EdenFS still accepts non utf-8 filenames to be written to it. In fact, most of
the time when a non utf-8 file is written to the working copy, and even though
EdenFS handles it properly, Mercurial ends up freaking out and crash. In all of
these cases, non-utf8 files were not intentional, and thus refusing to create
them wouldn't be a loss of functionality.

Note that this diff makes the asumption that Mercurial's manifest only accept
utf8 path, and thus we only have to protect against files being created in the
working copy that aren't utf8.

The unfortunate part of this diff is that it makes importing trees a bit more
expensive as testing that a path is utf8 valid is not free.

Reviewed By: chadaustin

Differential Revision: D25442975

fbshipit-source-id: 89341a004272736a61639751da43c2e9c673d5b3
2021-02-23 11:35:12 -08:00

232 lines
7.7 KiB
C++

/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#include "eden/fs/config/CheckoutConfig.h"
#include <cpptoml.h> // @manual=fbsource//third-party/cpptoml:cpptoml
#include <folly/Range.h>
#include <folly/String.h>
#include <folly/io/Cursor.h>
#include <folly/io/IOBuf.h>
#include <folly/json.h>
#include "eden/fs/utils/FileUtils.h"
#include "eden/fs/utils/PathMap.h"
using folly::ByteRange;
using folly::IOBuf;
using folly::StringPiece;
namespace {
// TOML config file for the individual client.
const facebook::eden::RelativePathPiece kCheckoutConfig{"config.toml"};
// Keys for the TOML config file.
constexpr folly::StringPiece kRepoSection{"repository"};
constexpr folly::StringPiece kRepoSourceKey{"path"};
constexpr folly::StringPiece kRepoTypeKey{"type"};
constexpr folly::StringPiece kRepoCaseSensitiveKey{"case-sensitive"};
constexpr folly::StringPiece kMountProtocol{"protocol"};
constexpr folly::StringPiece kRequireUtf8Path{"require-utf8-path"};
#ifdef _WIN32
constexpr folly::StringPiece kRepoGuid{"guid"};
#endif
#ifdef _WIN32
constexpr folly::StringPiece kMountProtocolPrjfs{"prjfs"};
#else
constexpr folly::StringPiece kMountProtocolFuse{"fuse"};
#endif
constexpr folly::StringPiece kMountProtocolNFS{"nfs"};
#ifdef _WIN32
constexpr folly::StringPiece kMountProtocolDefault{kMountProtocolPrjfs};
#else
constexpr folly::StringPiece kMountProtocolDefault{kMountProtocolFuse};
#endif
// Files of interest in the client directory.
const facebook::eden::RelativePathPiece kSnapshotFile{"SNAPSHOT"};
const facebook::eden::RelativePathPiece kOverlayDir{"local"};
// File holding mapping of client directories.
const facebook::eden::RelativePathPiece kClientDirectoryMap{"config.json"};
// Constants for use with the SNAPSHOT file
//
// The SNAPSHOT file format is:
// - 4 byte identifier: "eden"
// - 4 byte format version number (big endian)
// - 20 byte commit ID
// - (Optional 20 byte commit ID, only present when there are 2 parents)
constexpr folly::StringPiece kSnapshotFileMagic{"eden"};
enum : uint32_t {
kSnapshotHeaderSize = 8,
kSnapshotFormatVersion = 1,
};
} // namespace
namespace facebook {
namespace eden {
CheckoutConfig::CheckoutConfig(
AbsolutePathPiece mountPath,
AbsolutePathPiece clientDirectory)
: clientDirectory_(clientDirectory), mountPath_(mountPath) {}
ParentCommits CheckoutConfig::getParentCommits() const {
// Read the snapshot.
auto snapshotFile = getSnapshotPath();
auto snapshotFileContents = readFile(snapshotFile).value();
StringPiece contents{snapshotFileContents};
if (!contents.startsWith(kSnapshotFileMagic)) {
// Try reading an old-style SNAPSHOT file that just contains a single
// commit ID, as an ASCII hexadecimal string.
//
// TODO: In the not-to-distant future we can remove support for this old
// format, and simply throw an exception here if the snapshot file does not
// start with the correct identifier bytes.
auto snapshotID = folly::trimWhitespace(contents);
return ParentCommits{Hash{snapshotID}};
}
if (contents.size() < kSnapshotHeaderSize) {
throw std::runtime_error(folly::sformat(
"eden SNAPSHOT file is too short ({} bytes): {}",
contents.size(),
snapshotFile));
}
IOBuf buf(IOBuf::WRAP_BUFFER, ByteRange{contents});
folly::io::Cursor cursor(&buf);
cursor += kSnapshotFileMagic.size();
auto version = cursor.readBE<uint32_t>();
if (version != kSnapshotFormatVersion) {
throw std::runtime_error(folly::sformat(
"unsupported eden SNAPSHOT file format (version {}): {}",
uint32_t{version},
snapshotFile));
}
auto sizeLeft = cursor.length();
if (sizeLeft != Hash::RAW_SIZE && sizeLeft != (Hash::RAW_SIZE * 2)) {
throw std::runtime_error(folly::sformat(
"unexpected length for eden SNAPSHOT file ({} bytes): {}",
contents.size(),
snapshotFile));
}
ParentCommits parents;
cursor.pull(parents.parent1().mutableBytes().data(), Hash::RAW_SIZE);
if (!cursor.isAtEnd()) {
parents.parent2() = Hash{};
cursor.pull(parents.parent2()->mutableBytes().data(), Hash::RAW_SIZE);
}
return parents;
}
void CheckoutConfig::setParentCommits(const ParentCommits& parents) const {
std::array<uint8_t, kSnapshotHeaderSize + (2 * Hash::RAW_SIZE)> buffer;
IOBuf buf(IOBuf::WRAP_BUFFER, ByteRange{buffer});
folly::io::RWPrivateCursor cursor{&buf};
// Snapshot file format:
// 4-byte identifier: "eden"
cursor.push(ByteRange{kSnapshotFileMagic});
// 4-byte format version identifier
cursor.writeBE<uint32_t>(kSnapshotFormatVersion);
// 20-byte commit ID: parent1
cursor.push(parents.parent1().getBytes());
// Optional 20-byte commit ID: parent2
if (parents.parent2().has_value()) {
cursor.push(parents.parent2()->getBytes());
XCHECK(cursor.isAtEnd());
}
size_t writtenSize = cursor - folly::io::RWPrivateCursor{&buf};
ByteRange snapshotData{buffer.data(), writtenSize};
writeFileAtomic(getSnapshotPath(), snapshotData).value();
}
void CheckoutConfig::setParentCommits(Hash parent1, std::optional<Hash> parent2)
const {
return setParentCommits(ParentCommits{parent1, parent2});
}
const AbsolutePath& CheckoutConfig::getClientDirectory() const {
return clientDirectory_;
}
bool CheckoutConfig::getCaseSensitive() const {
return caseSensitive_;
}
AbsolutePath CheckoutConfig::getSnapshotPath() const {
return clientDirectory_ + kSnapshotFile;
}
AbsolutePath CheckoutConfig::getOverlayPath() const {
return clientDirectory_ + kOverlayDir;
}
std::unique_ptr<CheckoutConfig> CheckoutConfig::loadFromClientDirectory(
AbsolutePathPiece mountPath,
AbsolutePathPiece clientDirectory) {
// Extract repository name from the client config file
auto configPath = clientDirectory + kCheckoutConfig;
auto configRoot = cpptoml::parse_file(configPath.c_str());
// Construct CheckoutConfig object
auto config = std::make_unique<CheckoutConfig>(mountPath, clientDirectory);
// Load repository information
auto repository = configRoot->get_table(kRepoSection.str());
config->repoType_ = *repository->get_as<std::string>(kRepoTypeKey.str());
config->repoSource_ = *repository->get_as<std::string>(kRepoSourceKey.str());
auto mountProtocol = repository->get_as<std::string>(kMountProtocol.str())
.value_or(kMountProtocolDefault);
config->mountProtocol_ = mountProtocol == kMountProtocolNFS
? MountProtocol::NFS
: (folly::kIsWindows ? MountProtocol::PRJFS : MountProtocol::FUSE);
// Read optional case-sensitivity.
auto caseSensitive = repository->get_as<bool>(kRepoCaseSensitiveKey.str());
config->caseSensitive_ =
caseSensitive ? *caseSensitive : kPathMapDefaultCaseSensitive;
auto requireUtf8Path = repository->get_as<bool>(kRequireUtf8Path.str());
config->requireUtf8Path_ = requireUtf8Path ? *requireUtf8Path : true;
#ifdef _WIN32
auto guid = repository->get_as<std::string>(kRepoGuid.str());
config->repoGuid_ = guid ? Guid{*guid} : Guid::generate();
#endif
return config;
}
folly::dynamic CheckoutConfig::loadClientDirectoryMap(
AbsolutePathPiece edenDir) {
// Extract the JSON and strip any comments.
auto configJsonFile = edenDir + kClientDirectoryMap;
auto jsonContents = readFile(configJsonFile).value();
auto jsonWithoutComments = folly::json::stripComments(jsonContents);
if (jsonWithoutComments.empty()) {
return folly::dynamic::object();
}
// Parse the comment-free JSON while tolerating trailing commas.
folly::json::serialization_opts options;
options.allow_trailing_comma = true;
return folly::parseJson(jsonWithoutComments, options);
}
} // namespace eden
} // namespace facebook