mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 00:45:18 +03:00
ab3b2be7bf
Summary: Add some functions for validating and producing valid UTF-8 to be used in an upcoming diff. Reviewed By: fanzeyi Differential Revision: D21890510 fbshipit-source-id: b25144a34f1df91c72e8ed776b1ee7c1d68344c8
45 lines
1.3 KiB
C++
45 lines
1.3 KiB
C++
/*
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* This software may be used and distributed according to the terms of the
|
|
* GNU General Public License version 2.
|
|
*/
|
|
|
|
#include "eden/fs/utils/Utf8.h"
|
|
#include <folly/Unicode.h>
|
|
|
|
namespace facebook {
|
|
namespace eden {
|
|
|
|
bool isValidUtf8(folly::ByteRange str) {
|
|
const unsigned char* begin = str.begin();
|
|
const unsigned char* const end = str.end();
|
|
while (begin != end) {
|
|
// TODO: utf8ToCodePoint's signature means we're unable to distinguish
|
|
// between an invalid encoding and an encoding with a replacement character.
|
|
// Fortunately, replacement characters are uncommon.
|
|
if (U'\ufffd' == folly::utf8ToCodePoint(begin, end, true)) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
std::string ensureValidUtf8(folly::ByteRange str) {
|
|
std::string output;
|
|
output.reserve(str.size());
|
|
|
|
const unsigned char* begin = str.begin();
|
|
const unsigned char* const end = str.end();
|
|
while (begin != end) {
|
|
// codePointToUtf8 returns a std::string which is inefficient for something
|
|
// that always fits in 32 bits, but with SSO it probably never allocates at
|
|
// least.
|
|
output += folly::codePointToUtf8(folly::utf8ToCodePoint(begin, end, true));
|
|
}
|
|
return output;
|
|
}
|
|
|
|
} // namespace eden
|
|
} // namespace facebook
|