sapling/eden/fs/utils/Utf8.cpp
Chad Austin ab3b2be7bf add some utf-8 helper functions
Summary:
Add some functions for validating and producing valid UTF-8 to be used
in an upcoming diff.

Reviewed By: fanzeyi

Differential Revision: D21890510

fbshipit-source-id: b25144a34f1df91c72e8ed776b1ee7c1d68344c8
2020-06-10 19:29:51 -07:00

45 lines
1.3 KiB
C++

/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#include "eden/fs/utils/Utf8.h"
#include <folly/Unicode.h>
namespace facebook {
namespace eden {
bool isValidUtf8(folly::ByteRange str) {
const unsigned char* begin = str.begin();
const unsigned char* const end = str.end();
while (begin != end) {
// TODO: utf8ToCodePoint's signature means we're unable to distinguish
// between an invalid encoding and an encoding with a replacement character.
// Fortunately, replacement characters are uncommon.
if (U'\ufffd' == folly::utf8ToCodePoint(begin, end, true)) {
return false;
}
}
return true;
}
std::string ensureValidUtf8(folly::ByteRange str) {
std::string output;
output.reserve(str.size());
const unsigned char* begin = str.begin();
const unsigned char* const end = str.end();
while (begin != end) {
// codePointToUtf8 returns a std::string which is inefficient for something
// that always fits in 32 bits, but with SSO it probably never allocates at
// least.
output += folly::codePointToUtf8(folly::utf8ToCodePoint(begin, end, true));
}
return output;
}
} // namespace eden
} // namespace facebook