remove absl/random and absl/memory, add absl::btree_map

This commit is contained in:
Taku Kudo 2024-01-07 10:48:48 +00:00
parent adf9e81b63
commit f5c736302c
15 changed files with 103 additions and 211 deletions

View File

@ -17,11 +17,11 @@
#include <cstdint>
#include <limits>
#include <set>
#include <string>
#include <vector>
#include "sentencepiece_model.pb.h"
#include "third_party/absl/container/btree_set.h"
#include "third_party/absl/container/flat_hash_map.h"
#include "trainer_interface.h"
@ -51,7 +51,7 @@ class Trainer : public TrainerInterface {
// Position list. Use set so that we can keep the order of occurrence.
// See EncodePos/DecodePos.
std::set<uint64_t> positions;
absl::btree_set<uint64_t> positions;
bool IsBigram() const { return left != nullptr && right != nullptr; }
std::string ToString() const;
@ -72,8 +72,7 @@ class Trainer : public TrainerInterface {
CHECK_LE(l, std::numeric_limits<uint16_t>::max());
CHECK_LE(r, std::numeric_limits<uint16_t>::max());
const uint64_t n = (static_cast<uint64_t>(sid) << 32) |
(static_cast<uint64_t>(l) << 16) |
r;
(static_cast<uint64_t>(l) << 16) | r;
return n;
}
@ -118,7 +117,7 @@ class Trainer : public TrainerInterface {
absl::flat_hash_map<uint64_t, Symbol *> symbols_cache_;
// Set of symbols from which we find the best symbol in each iteration.
std::set<Symbol *> active_symbols_;
absl::btree_set<Symbol *> active_symbols_;
// Stores symbols allocated in heap so that we can delete them at onece.
std::vector<Symbol *> allocated_;

View File

@ -18,7 +18,6 @@
#include <iostream>
#include <memory>
#include "third_party/absl/memory/memory.h"
#include "util.h"
#if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE)
@ -105,12 +104,12 @@ using DefaultWritableFile = PosixWritableFile;
std::unique_ptr<ReadableFile> NewReadableFile(absl::string_view filename,
bool is_binary) {
return absl::make_unique<DefaultReadableFile>(filename, is_binary);
return std::make_unique<DefaultReadableFile>(filename, is_binary);
}
std::unique_ptr<WritableFile> NewWritableFile(absl::string_view filename,
bool is_binary) {
return absl::make_unique<DefaultWritableFile>(filename, is_binary);
return std::make_unique<DefaultWritableFile>(filename, is_binary);
}
} // namespace filesystem

View File

@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.!
#include "model_factory.h"
#include "bpe_model.h"
#include "char_model.h"
#include "model_factory.h"
#include "third_party/absl/memory/memory.h"
#include "unigram_model.h"
#include "word_model.h"
@ -28,16 +28,16 @@ std::unique_ptr<ModelInterface> ModelFactory::Create(
switch (trainer_spec.model_type()) {
case TrainerSpec::UNIGRAM:
return absl::make_unique<unigram::Model>(model_proto);
return std::make_unique<unigram::Model>(model_proto);
break;
case TrainerSpec::BPE:
return absl::make_unique<bpe::Model>(model_proto);
return std::make_unique<bpe::Model>(model_proto);
break;
case TrainerSpec::WORD:
return absl::make_unique<word::Model>(model_proto);
return std::make_unique<word::Model>(model_proto);
break;
case TrainerSpec::CHAR:
return absl::make_unique<character::Model>(model_proto);
return std::make_unique<character::Model>(model_proto);
break;
default:
LOG(ERROR) << "Unknown model_type: " << trainer_spec.model_type();
@ -45,6 +45,6 @@ std::unique_ptr<ModelInterface> ModelFactory::Create(
break;
}
return absl::make_unique<unigram::Model>(model_proto);
return std::make_unique<unigram::Model>(model_proto);
}
} // namespace sentencepiece

View File

@ -17,7 +17,6 @@
#include <algorithm>
#include "sentencepiece_model.pb.h"
#include "third_party/absl/memory/memory.h"
#include "third_party/absl/strings/str_format.h"
#include "util.h"
@ -148,7 +147,7 @@ void ModelInterface::InitializePieces() {
}
}
matcher_ = absl::make_unique<normalizer::PrefixMatcher>(user_defined_symbols);
matcher_ = std::make_unique<normalizer::PrefixMatcher>(user_defined_symbols);
}
std::vector<absl::string_view> SplitIntoWords(absl::string_view text,

View File

@ -18,7 +18,6 @@
#include <vector>
#include "common.h"
#include "third_party/absl/memory/memory.h"
#include "third_party/absl/strings/match.h"
#include "third_party/absl/strings/string_view.h"
#include "third_party/absl/strings/strip.h"
@ -58,7 +57,7 @@ void Normalizer::Init() {
if (!status_.ok()) return;
// Reads the body of double array.
trie_ = absl::make_unique<Darts::DoubleArray>();
trie_ = std::make_unique<Darts::DoubleArray>();
// The second arg of set_array is not the size of blob,
// but the number of double array units.
@ -314,7 +313,7 @@ PrefixMatcher::PrefixMatcher(const std::set<absl::string_view> &dic) {
std::vector<const char *> key;
key.reserve(dic.size());
for (const auto &it : dic) key.push_back(it.data());
trie_ = absl::make_unique<Darts::DoubleArray>();
trie_ = std::make_unique<Darts::DoubleArray>();
if (trie_->build(key.size(), const_cast<char **>(&key[0]), nullptr,
nullptr) != 0) {
LOG(ERROR) << "Failed to build the TRIE for PrefixMatcher";

View File

@ -30,7 +30,6 @@
#include "model_interface.h"
#include "normalizer.h"
#include "sentencepiece.pb.h"
#include "third_party/absl/memory/memory.h"
#include "third_party/absl/strings/numbers.h"
#include "third_party/absl/strings/str_cat.h"
#include "third_party/absl/strings/str_join.h"
@ -217,7 +216,7 @@ SentencePieceProcessor::SentencePieceProcessor() {}
SentencePieceProcessor::~SentencePieceProcessor() {}
util::Status SentencePieceProcessor::Load(absl::string_view filename) {
auto model_proto = absl::make_unique<ModelProto>();
auto model_proto = std::make_unique<ModelProto>();
RETURN_IF_ERROR(io::LoadModelProto(filename, model_proto.get()));
return Load(std::move(model_proto));
}
@ -227,14 +226,14 @@ void SentencePieceProcessor::LoadOrDie(absl::string_view filename) {
}
util::Status SentencePieceProcessor::Load(const ModelProto &model_proto) {
auto model_proto_copy = absl::make_unique<ModelProto>();
auto model_proto_copy = std::make_unique<ModelProto>();
*model_proto_copy = model_proto;
return Load(std::move(model_proto_copy));
}
util::Status SentencePieceProcessor::LoadFromSerializedProto(
absl::string_view serialized) {
auto model_proto = absl::make_unique<ModelProto>();
auto model_proto = std::make_unique<ModelProto>();
CHECK_OR_RETURN(
model_proto->ParseFromArray(serialized.data(), serialized.size()));
return Load(std::move(model_proto));
@ -244,11 +243,11 @@ util::Status SentencePieceProcessor::Load(
std::unique_ptr<ModelProto> model_proto) {
model_proto_ = std::move(model_proto);
model_ = ModelFactory::Create(*model_proto_);
normalizer_ = absl::make_unique<normalizer::Normalizer>(
normalizer_ = std::make_unique<normalizer::Normalizer>(
model_proto_->normalizer_spec(), model_proto_->trainer_spec());
if (model_proto_->has_denormalizer_spec() &&
!model_proto_->denormalizer_spec().precompiled_charsmap().empty()) {
denormalizer_ = absl::make_unique<normalizer::Normalizer>(
denormalizer_ = std::make_unique<normalizer::Normalizer>(
model_proto_->denormalizer_spec());
}

View File

@ -25,7 +25,6 @@
#include "sentencepiece_trainer.h"
#include "testharness.h"
#include "third_party/absl/container/flat_hash_map.h"
#include "third_party/absl/memory/memory.h"
#include "third_party/absl/strings/str_cat.h"
#include "third_party/absl/strings/string_view.h"
#include "util.h"
@ -123,7 +122,7 @@ NormalizerSpec MakeDefaultNormalizerSpec() {
TEST(SentencepieceProcessorTest, StatusTest) {
SentencePieceProcessor sp;
EXPECT_FALSE(sp.status().ok());
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
sp.SetModel(std::move(mock));
EXPECT_FALSE(sp.status().ok());
}
@ -135,7 +134,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
const auto normalization_spec = MakeDefaultNormalizerSpec();
{
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
const EncodeResult result = {
{WS "ABC", 3}, {WS "DE", 4}, {"F", 0}, {"</s>", 2}};
@ -143,7 +142,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
sp.SetModel(std::move(mock));
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
std::vector<std::string> output;
EXPECT_TRUE(sp.Encode("ABC DEF", &output).ok());
@ -186,7 +185,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
// Unknown sequences.
{
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
const EncodeResult result = {
{WS "ABC", 3}, {WS "D", 4}, {"E", 0}, {"F", 0}, {"</s>", 2}};
@ -196,7 +195,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
mock->SetEncodeResult(kInput, result);
sp.SetModel(std::move(mock));
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
std::vector<std::string> output;
EXPECT_TRUE(sp.Encode("ABC DEF", &output).ok());
@ -236,7 +235,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
// Byte-fallback.
{
const absl::string_view kInput2 = WS "ABC" WS "DEFあ";
auto mock = absl::make_unique<ByteFallbackMockModel>();
auto mock = std::make_unique<ByteFallbackMockModel>();
const EncodeResult result = {{WS "ABC", 3}, {WS "D", 4}, {"E", 0},
{"F", 0}, {"", 0}, {"</s>", 2}};
@ -250,7 +249,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
mock->SetEncodeResult(kInput2, result);
sp.SetModel(std::move(mock));
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
std::vector<std::string> output;
EXPECT_TRUE(sp.Encode("ABC DEFあ", &output).ok());
@ -306,12 +305,12 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
// Crash if
// ModelInterface::Encode() returns shorter results.
{
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
const EncodeResult result = {{WS "ABC", 3}};
mock->SetEncodeResult(kInput, result);
sp.SetModel(std::move(mock));
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
// Expects crash.
EXPECT_FALSE(sp.Encode("ABC DEF", &spt).ok());
@ -320,13 +319,13 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
// Crash if
// ModelInterface::Encode() returns longer results.
{
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
const EncodeResult result = {
{WS "ABC", 3}, {WS "DE", 4}, {"F", 5}, {"G", 6}};
mock->SetEncodeResult(kInput, result);
sp.SetModel(std::move(mock));
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
// Expects crash.
EXPECT_FALSE(sp.Encode("ABC DEF", &spt).ok());
@ -335,13 +334,13 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
// Crash if
// ModelInterface::Encode() returns an empty piece.
{
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
const EncodeResult result = {
{WS "ABC", 3}, {WS "DE", 4}, {"", 5}, {"F", 6}};
mock->SetEncodeResult(kInput, result);
sp.SetModel(std::move(mock));
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
// Expects crash.
EXPECT_FALSE(sp.Encode("ABC DEF", &spt).ok());
@ -349,7 +348,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
// Halfwidth to Fullwidith katakana normalization.
{
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
const EncodeResult result = {{WS "グー", 3}, {"グル", 4}, {"</s>", 2}};
const absl::string_view input = WS "グーグル";
mock->SetEncodeResult(input, result);
@ -383,7 +382,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) {
// One to many normalization.
{
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
const EncodeResult result = {{WS "株式", 3}, {"会社", 4}, {"</s>", 2}};
const absl::string_view input = WS "株式会社";
mock->SetEncodeResult(input, result);
@ -422,7 +421,7 @@ TEST(SentencepieceProcessorTest, NBestEncodeTest) {
const auto normalization_spec = MakeDefaultNormalizerSpec();
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
const NBestEncodeResult result = {
{{{WS "ABC", 3}, {WS "DE", 4}, {"F", 0}, {"</s>", 2}},
@ -433,7 +432,7 @@ TEST(SentencepieceProcessorTest, NBestEncodeTest) {
mock->SetNBestEncodeResult(kInput, result);
sp.SetModel(std::move(mock));
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
std::vector<std::vector<std::string>> output;
EXPECT_TRUE(sp.NBestEncode("ABC DEF", 2, &output).ok());
@ -464,7 +463,7 @@ TEST(SentencepieceProcessorTest, NBestEncodeTest) {
spt2.ParseFromString(sp.NBestEncodeAsSerializedProto("ABC DEF", 2)));
EXPECT_EQ(spt.SerializeAsString(), spt2.SerializeAsString());
auto mock_empty = absl::make_unique<MockModel>();
auto mock_empty = std::make_unique<MockModel>();
mock_empty->SetNBestEncodeResult(kInput, {});
sp.SetModel(std::move(mock_empty));
EXPECT_FALSE(sp.NBestEncode("ABC DEF", 2, &output).ok());
@ -476,7 +475,7 @@ TEST(SentencepieceProcessorTest, SampleEncodeTest) {
const auto normalization_spec = MakeDefaultNormalizerSpec();
auto mock = absl::make_unique<MockModel>();
auto mock = std::make_unique<MockModel>();
const EncodeResult result = {
{WS "ABC", 3}, {WS "DE", 4}, {"F", 0}, {"</s>", 2}};
@ -490,7 +489,7 @@ TEST(SentencepieceProcessorTest, SampleEncodeTest) {
mock->SetEncodeResult(kInput, result);
sp.SetModel(std::move(mock));
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
std::vector<std::string> output;
EXPECT_TRUE(sp.SampleEncode("ABC DEF", -1, 0.5, &output).ok());
@ -536,7 +535,7 @@ TEST(SentencepieceProcessorTest, SampleEncodeTest) {
const float prob = 1.0 * freq[0] / (freq[0] + freq[1]);
EXPECT_NEAR(prob, expected_prob, 0.05);
auto mock_empty = absl::make_unique<MockModel>();
auto mock_empty = std::make_unique<MockModel>();
mock_empty->SetNBestEncodeResult(kInput, {});
sp.SetModel(std::move(mock_empty));
EXPECT_FALSE(sp.SampleEncode("ABC DEF", 10, 0.5, &output).ok());
@ -578,12 +577,12 @@ TEST(SentencepieceProcessorTest, DecodeTest) {
{
SentencePieceProcessor sp;
auto mock = absl::make_unique<DecodeMockModel>();
auto mock = std::make_unique<DecodeMockModel>();
sp.SetModel(std::move(mock));
const auto normalization_spec = MakeDefaultNormalizerSpec();
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
@ -629,15 +628,15 @@ TEST(SentencepieceProcessorTest, DecodeTest) {
// unk_surface is not defined.
{
SentencePieceProcessor sp;
auto proto = absl::make_unique<ModelProto>();
auto proto = std::make_unique<ModelProto>();
sp.Load(std::move(proto)).IgnoreError();
auto mock = absl::make_unique<DecodeMockModel>();
auto mock = std::make_unique<DecodeMockModel>();
sp.SetModel(std::move(mock));
const auto normalization_spec = MakeDefaultNormalizerSpec();
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
@ -648,16 +647,16 @@ TEST(SentencepieceProcessorTest, DecodeTest) {
{
SentencePieceProcessor sp;
auto proto = absl::make_unique<ModelProto>();
auto proto = std::make_unique<ModelProto>();
proto->mutable_trainer_spec()->set_unk_surface("");
sp.Load(std::move(proto)).IgnoreError();
auto mock = absl::make_unique<DecodeMockModel>();
auto mock = std::make_unique<DecodeMockModel>();
sp.SetModel(std::move(mock));
const auto normalization_spec = MakeDefaultNormalizerSpec();
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
@ -668,16 +667,16 @@ TEST(SentencepieceProcessorTest, DecodeTest) {
{
SentencePieceProcessor sp;
auto proto = absl::make_unique<ModelProto>();
auto proto = std::make_unique<ModelProto>();
proto->mutable_trainer_spec()->set_unk_surface("<UNK>");
sp.Load(std::move(proto)).IgnoreError();
auto mock = absl::make_unique<DecodeMockModel>();
auto mock = std::make_unique<DecodeMockModel>();
sp.SetModel(std::move(mock));
const auto normalization_spec = MakeDefaultNormalizerSpec();
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
@ -688,18 +687,18 @@ TEST(SentencepieceProcessorTest, DecodeTest) {
{
SentencePieceProcessor sp;
auto proto = absl::make_unique<ModelProto>();
auto proto = std::make_unique<ModelProto>();
proto->mutable_trainer_spec()->set_unk_surface("");
proto->mutable_normalizer_spec()->set_add_dummy_prefix(false);
proto->mutable_normalizer_spec()->set_remove_extra_whitespaces(false);
sp.Load(std::move(proto)).IgnoreError();
auto mock = absl::make_unique<DecodeMockModel>();
auto mock = std::make_unique<DecodeMockModel>();
sp.SetModel(std::move(mock));
const auto normalization_spec = MakeDefaultNormalizerSpec();
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
@ -746,18 +745,18 @@ TEST(SentencepieceProcessorTest, DummyPrefixDecodeTest) {
{
SentencePieceProcessor sp;
auto proto = absl::make_unique<ModelProto>();
auto proto = std::make_unique<ModelProto>();
proto->mutable_trainer_spec()->set_unk_surface("");
proto->mutable_normalizer_spec()->set_add_dummy_prefix(true);
proto->mutable_normalizer_spec()->set_remove_extra_whitespaces(false);
sp.Load(std::move(proto)).IgnoreError();
auto mock = absl::make_unique<DecodeMockModel>();
auto mock = std::make_unique<DecodeMockModel>();
sp.SetModel(std::move(mock));
const auto normalization_spec = MakeDefaultNormalizerSpec();
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
@ -768,18 +767,18 @@ TEST(SentencepieceProcessorTest, DummyPrefixDecodeTest) {
{
SentencePieceProcessor sp;
auto proto = absl::make_unique<ModelProto>();
auto proto = std::make_unique<ModelProto>();
proto->mutable_trainer_spec()->set_unk_surface("");
proto->mutable_normalizer_spec()->set_add_dummy_prefix(true);
proto->mutable_normalizer_spec()->set_remove_extra_whitespaces(true);
sp.Load(std::move(proto)).IgnoreError();
auto mock = absl::make_unique<DecodeMockModel>();
auto mock = std::make_unique<DecodeMockModel>();
sp.SetModel(std::move(mock));
const auto normalization_spec = MakeDefaultNormalizerSpec();
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
SentencePieceText spt;
@ -833,12 +832,12 @@ TEST(SentencepieceProcessorTest, ByteFallbackDecodeTest) {
};
SentencePieceProcessor sp;
auto mock = absl::make_unique<ByteFallbackDecodeMockModel>();
auto mock = std::make_unique<ByteFallbackDecodeMockModel>();
sp.SetModel(std::move(mock));
const auto normalization_spec = MakeDefaultNormalizerSpec();
sp.SetNormalizer(
absl::make_unique<normalizer::Normalizer>(normalization_spec));
std::make_unique<normalizer::Normalizer>(normalization_spec));
{
const std::vector<std::string> input = {
@ -1347,7 +1346,7 @@ TEST(SentencePieceProcessorTest, EndToEndTest) {
// Moves ModelProto.
{
SentencePieceProcessor sp;
auto moved = absl::make_unique<ModelProto>();
auto moved = std::make_unique<ModelProto>();
const ModelProto *moved_ptr = moved.get();
*moved = model_proto;
EXPECT_TRUE(sp.Load(std::move(moved)).ok());

View File

@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.!
#include "trainer_factory.h"
#include "bpe_model_trainer.h"
#include "char_model_trainer.h"
#include "third_party/absl/memory/memory.h"
#include "trainer_factory.h"
#include "unigram_model_trainer.h"
#include "word_model_trainer.h"
@ -27,27 +27,27 @@ std::unique_ptr<TrainerInterface> TrainerFactory::Create(
const NormalizerSpec &denormalizer_spec) {
switch (trainer_spec.model_type()) {
case TrainerSpec::UNIGRAM:
return absl::make_unique<unigram::Trainer>(trainer_spec, normalizer_spec,
denormalizer_spec);
return std::make_unique<unigram::Trainer>(trainer_spec, normalizer_spec,
denormalizer_spec);
break;
case TrainerSpec::BPE:
return absl::make_unique<bpe::Trainer>(trainer_spec, normalizer_spec,
denormalizer_spec);
return std::make_unique<bpe::Trainer>(trainer_spec, normalizer_spec,
denormalizer_spec);
break;
case TrainerSpec::WORD:
return absl::make_unique<word::Trainer>(trainer_spec, normalizer_spec,
denormalizer_spec);
return std::make_unique<word::Trainer>(trainer_spec, normalizer_spec,
denormalizer_spec);
break;
case TrainerSpec::CHAR:
return absl::make_unique<character::Trainer>(
trainer_spec, normalizer_spec, denormalizer_spec);
return std::make_unique<character::Trainer>(trainer_spec, normalizer_spec,
denormalizer_spec);
break;
default:
LOG(FATAL) << "Unknown model_type: " << trainer_spec.model_type();
break;
}
return absl::make_unique<unigram::Trainer>(trainer_spec, normalizer_spec,
denormalizer_spec);
return std::make_unique<unigram::Trainer>(trainer_spec, normalizer_spec,
denormalizer_spec);
}
} // namespace sentencepiece

View File

@ -29,9 +29,6 @@
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
#include "third_party/absl/container/flat_hash_map.h"
#include "third_party/absl/memory/memory.h"
#include "third_party/absl/random/distributions.h"
#include "third_party/absl/random/random.h"
#include "third_party/absl/strings/numbers.h"
#include "third_party/absl/strings/str_cat.h"
#include "third_party/absl/strings/str_format.h"
@ -107,7 +104,7 @@ class SentenceSelector {
if (spec_->input_sentence_size() > 0) {
if (spec_->shuffle_input_sentence()) {
constexpr size_t kSeed = 12345678;
sampler_ = absl::make_unique<Sampler>(
sampler_ = std::make_unique<Sampler>(
sentences, spec_->input_sentence_size(), kSeed);
} else {
LOG(INFO)
@ -303,12 +300,12 @@ bool TrainerInterface::IsValidSentencePiece(
}
template <typename T>
void AddDPNoise(const TrainerSpec &trainer_spec,
random::SharedBitGen &generator, T *to_update) {
void AddDPNoise(const TrainerSpec &trainer_spec, std::mt19937 *generator,
T *to_update) {
if (trainer_spec.differential_privacy_noise_level() > 0) {
float random_num = absl::Gaussian<float>(
generator, 0, trainer_spec.differential_privacy_noise_level());
std::normal_distribution<float> dist(
0.0f, trainer_spec.differential_privacy_noise_level());
const float random_num = dist(*generator);
*to_update =
std::round(std::max(0.f, random_num + static_cast<float>(*to_update)));
}
@ -351,7 +348,7 @@ util::Status TrainerInterface::LoadSentences() {
LOG(INFO) << "SentenceIterator is not specified. Using "
"MultiFileSentenceIterator.";
sentence_iterator_impl =
absl::make_unique<MultiFileSentenceIterator>(std::vector<std::string>(
std::make_unique<MultiFileSentenceIterator>(std::vector<std::string>(
trainer_spec_.input().begin(), trainer_spec_.input().end()));
sentence_iterator_ = sentence_iterator_impl.get();
}
@ -428,7 +425,7 @@ END:
LOG(INFO) << "Normalizing sentences...";
CHECK_OR_RETURN(!sentences_.empty());
{
auto pool = absl::make_unique<ThreadPool>(trainer_spec_.num_threads());
auto pool = std::make_unique<ThreadPool>(trainer_spec_.num_threads());
pool->StartWorkers();
for (int n = 0; n < trainer_spec_.num_threads(); ++n) {
pool->Schedule([&, n]() {
@ -475,12 +472,12 @@ END:
std::min<uint64>(trainer_spec_.num_threads(), sentences_.size() - 1);
{
auto pool = absl::make_unique<ThreadPool>(num_workers);
auto pool = std::make_unique<ThreadPool>(num_workers);
pool->StartWorkers();
for (int n = 0; n < num_workers; ++n) {
pool->Schedule([&, n]() {
// One per thread generator.
random::SharedBitGen generator;
auto *generator = random::GetRandomGenerator();
for (size_t i = n; i < sentences_.size(); i += num_workers) {
AddDPNoise<int64>(trainer_spec_, generator,
&(sentences_[i].second));

View File

@ -25,7 +25,6 @@
#include <vector>
#include "third_party/absl/container/flat_hash_map.h"
#include "third_party/absl/memory/memory.h"
#include "third_party/absl/strings/str_split.h"
#include "third_party/absl/strings/string_view.h"
#include "util.h"
@ -626,7 +625,7 @@ void Model::BuildTrie(std::vector<std::pair<absl::string_view, int>> *pieces) {
value[i] = (*pieces)[i].second; // vocab_id
}
trie_ = absl::make_unique<Darts::DoubleArray>();
trie_ = std::make_unique<Darts::DoubleArray>();
if (trie_->build(key.size(), const_cast<char **>(&key[0]), nullptr,
&value[0]) != 0) {
status_ = util::InternalError("cannot build double-array.");

View File

@ -28,7 +28,6 @@
#include "pretokenizer_for_training.h"
#include "sentencepiece_trainer.h"
#include "third_party/absl/container/flat_hash_map.h"
#include "third_party/absl/memory/memory.h"
#include "third_party/absl/strings/str_replace.h"
#include "third_party/absl/strings/str_split.h"
#include "third_party/esaxx/esa.hxx" // Suffix array library.
@ -283,7 +282,7 @@ std::vector<float> Trainer::RunEStep(const TrainerModel &model, float *obj,
std::vector<float> objs(trainer_spec_.num_threads(), 0.0);
std::vector<int64> ntokens(trainer_spec_.num_threads(), 0.0);
auto pool = absl::make_unique<ThreadPool>(trainer_spec_.num_threads());
auto pool = std::make_unique<ThreadPool>(trainer_spec_.num_threads());
pool->StartWorkers();
int64 all_sentence_freq = 0;
@ -405,7 +404,7 @@ TrainerModel::SentencePieces Trainer::PruneSentencePieces(
std::vector<std::vector<std::vector<int>>> inverteds(
trainer_spec_.num_threads());
auto pool = absl::make_unique<ThreadPool>(trainer_spec_.num_threads());
auto pool = std::make_unique<ThreadPool>(trainer_spec_.num_threads());
pool->StartWorkers();
for (int n = 0; n < trainer_spec_.num_threads(); ++n) {
freqs[n].resize(sentencepieces.size(), 0.0);

View File

@ -288,11 +288,6 @@ namespace random {
std::mt19937 *GetRandomGenerator();
class SharedBitGen {
public:
std::mt19937 *engine() { return GetRandomGenerator(); }
};
template <typename T>
class ReservoirSampler {
public:

View File

@ -12,7 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.!
#ifndef ABSL_CONTAINER_RANDOM_H_
#define ABSL_CONTAINER_RANDOM_H_
#ifndef ABSL_CONTAINER_BTREE_SET_
#define ABSL_CONTAINER_BTREE_SET_
#endif // ABSL_CONTAINER_RANDOM_H_
#include <set>
namespace absl {
template <typename T, typename Compare = std::less<T>,
typename Allocator = std::allocator<T>>
using btree_set = std::set<T, Compare, Allocator>;
}
#endif // ABSL_CONTAINER_BTREE_SET_

View File

@ -1,71 +0,0 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: string_view.h
// -----------------------------------------------------------------------------
//
// This file contains the definition of the `absl::string_view` class. A
// `string_view` points to a contiguous span of characters, often part or all of
// another `std::string`, double-quoted std::string literal, character array, or
// even another `string_view`.
//
// This `absl::string_view` abstraction is designed to be a drop-in
// replacement for the C++17 `std::string_view` abstraction.
#ifndef ABSL_MEMORY_MEMORY_H_
#define ABSL_MEMORY_MEMORY_H_
#include <memory>
namespace absl {
// Trait to select overloads and return types for MakeUnique.
template <typename T>
struct MakeUniqueResult {
using scalar = std::unique_ptr<T>;
};
template <typename T>
struct MakeUniqueResult<T[]> {
using array = std::unique_ptr<T[]>;
};
template <typename T, size_t N>
struct MakeUniqueResult<T[N]> {
using invalid = void;
};
// MakeUnique<T>(...) is an early implementation of C++14 std::make_unique.
// It is designed to be 100% compatible with std::make_unique so that the
// eventual switchover will be a simple renaming operation.
template <typename T, typename... Args>
typename MakeUniqueResult<T>::scalar make_unique(Args &&... args) { // NOLINT
return std::unique_ptr<T>(
new T(std::forward<Args>(args)...)); // NOLINT(build/c++11)
}
// Overload for array of unknown bound.
// The allocation of arrays needs to use the array form of new,
// and cannot take element constructor arguments.
template <typename T>
typename MakeUniqueResult<T>::array make_unique(size_t n) {
return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
}
// Reject arrays of known bound.
template <typename T, typename... Args>
typename MakeUniqueResult<T>::invalid make_unique(Args &&... /* args */) =
delete; // NOLINT
} // namespace absl
#endif // ABSL_MEMORY_MEMORY_H_

View File

@ -1,31 +0,0 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.!
#ifndef ABSL_CONTAINER_DISTRIBUTIONS_H_
#define ABSL_CONTAINER_DISTRIBUTIONS_H_
#include <random>
#include "random.h"
namespace absl {
template <typename T, typename G>
T Gaussian(G &generator, T mean, T stddev) {
std::normal_distribution<> dist(mean, stddev);
return dist(*generator.engine());
}
} // namespace absl
#endif // ABSL_CONTAINER_DISTRIBUTIONS_H_