diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h index 2879d29..15ca479 100644 --- a/src/bpe_model_trainer.h +++ b/src/bpe_model_trainer.h @@ -17,11 +17,11 @@ #include #include -#include #include #include #include "sentencepiece_model.pb.h" +#include "third_party/absl/container/btree_set.h" #include "third_party/absl/container/flat_hash_map.h" #include "trainer_interface.h" @@ -51,7 +51,7 @@ class Trainer : public TrainerInterface { // Position list. Use set so that we can keep the order of occurrence. // See EncodePos/DecodePos. - std::set positions; + absl::btree_set positions; bool IsBigram() const { return left != nullptr && right != nullptr; } std::string ToString() const; @@ -72,8 +72,7 @@ class Trainer : public TrainerInterface { CHECK_LE(l, std::numeric_limits::max()); CHECK_LE(r, std::numeric_limits::max()); const uint64_t n = (static_cast(sid) << 32) | - (static_cast(l) << 16) | - r; + (static_cast(l) << 16) | r; return n; } @@ -118,7 +117,7 @@ class Trainer : public TrainerInterface { absl::flat_hash_map symbols_cache_; // Set of symbols from which we find the best symbol in each iteration. - std::set active_symbols_; + absl::btree_set active_symbols_; // Stores symbols allocated in heap so that we can delete them at onece. std::vector allocated_; diff --git a/src/filesystem.cc b/src/filesystem.cc index ce1ac73..d8cb7a3 100644 --- a/src/filesystem.cc +++ b/src/filesystem.cc @@ -18,7 +18,6 @@ #include #include -#include "third_party/absl/memory/memory.h" #include "util.h" #if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE) @@ -105,12 +104,12 @@ using DefaultWritableFile = PosixWritableFile; std::unique_ptr NewReadableFile(absl::string_view filename, bool is_binary) { - return absl::make_unique(filename, is_binary); + return std::make_unique(filename, is_binary); } std::unique_ptr NewWritableFile(absl::string_view filename, bool is_binary) { - return absl::make_unique(filename, is_binary); + return std::make_unique(filename, is_binary); } } // namespace filesystem diff --git a/src/model_factory.cc b/src/model_factory.cc index be99501..5987445 100644 --- a/src/model_factory.cc +++ b/src/model_factory.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "model_factory.h" + #include "bpe_model.h" #include "char_model.h" -#include "model_factory.h" -#include "third_party/absl/memory/memory.h" #include "unigram_model.h" #include "word_model.h" @@ -28,16 +28,16 @@ std::unique_ptr ModelFactory::Create( switch (trainer_spec.model_type()) { case TrainerSpec::UNIGRAM: - return absl::make_unique(model_proto); + return std::make_unique(model_proto); break; case TrainerSpec::BPE: - return absl::make_unique(model_proto); + return std::make_unique(model_proto); break; case TrainerSpec::WORD: - return absl::make_unique(model_proto); + return std::make_unique(model_proto); break; case TrainerSpec::CHAR: - return absl::make_unique(model_proto); + return std::make_unique(model_proto); break; default: LOG(ERROR) << "Unknown model_type: " << trainer_spec.model_type(); @@ -45,6 +45,6 @@ std::unique_ptr ModelFactory::Create( break; } - return absl::make_unique(model_proto); + return std::make_unique(model_proto); } } // namespace sentencepiece diff --git a/src/model_interface.cc b/src/model_interface.cc index c8e1e2e..bb52f9a 100644 --- a/src/model_interface.cc +++ b/src/model_interface.cc @@ -17,7 +17,6 @@ #include #include "sentencepiece_model.pb.h" -#include "third_party/absl/memory/memory.h" #include "third_party/absl/strings/str_format.h" #include "util.h" @@ -148,7 +147,7 @@ void ModelInterface::InitializePieces() { } } - matcher_ = absl::make_unique(user_defined_symbols); + matcher_ = std::make_unique(user_defined_symbols); } std::vector SplitIntoWords(absl::string_view text, diff --git a/src/normalizer.cc b/src/normalizer.cc index 0e406a6..b50e867 100644 --- a/src/normalizer.cc +++ b/src/normalizer.cc @@ -18,7 +18,6 @@ #include #include "common.h" -#include "third_party/absl/memory/memory.h" #include "third_party/absl/strings/match.h" #include "third_party/absl/strings/string_view.h" #include "third_party/absl/strings/strip.h" @@ -58,7 +57,7 @@ void Normalizer::Init() { if (!status_.ok()) return; // Reads the body of double array. - trie_ = absl::make_unique(); + trie_ = std::make_unique(); // The second arg of set_array is not the size of blob, // but the number of double array units. @@ -314,7 +313,7 @@ PrefixMatcher::PrefixMatcher(const std::set &dic) { std::vector key; key.reserve(dic.size()); for (const auto &it : dic) key.push_back(it.data()); - trie_ = absl::make_unique(); + trie_ = std::make_unique(); if (trie_->build(key.size(), const_cast(&key[0]), nullptr, nullptr) != 0) { LOG(ERROR) << "Failed to build the TRIE for PrefixMatcher"; diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc index 2545ab4..7b1951f 100644 --- a/src/sentencepiece_processor.cc +++ b/src/sentencepiece_processor.cc @@ -30,7 +30,6 @@ #include "model_interface.h" #include "normalizer.h" #include "sentencepiece.pb.h" -#include "third_party/absl/memory/memory.h" #include "third_party/absl/strings/numbers.h" #include "third_party/absl/strings/str_cat.h" #include "third_party/absl/strings/str_join.h" @@ -217,7 +216,7 @@ SentencePieceProcessor::SentencePieceProcessor() {} SentencePieceProcessor::~SentencePieceProcessor() {} util::Status SentencePieceProcessor::Load(absl::string_view filename) { - auto model_proto = absl::make_unique(); + auto model_proto = std::make_unique(); RETURN_IF_ERROR(io::LoadModelProto(filename, model_proto.get())); return Load(std::move(model_proto)); } @@ -227,14 +226,14 @@ void SentencePieceProcessor::LoadOrDie(absl::string_view filename) { } util::Status SentencePieceProcessor::Load(const ModelProto &model_proto) { - auto model_proto_copy = absl::make_unique(); + auto model_proto_copy = std::make_unique(); *model_proto_copy = model_proto; return Load(std::move(model_proto_copy)); } util::Status SentencePieceProcessor::LoadFromSerializedProto( absl::string_view serialized) { - auto model_proto = absl::make_unique(); + auto model_proto = std::make_unique(); CHECK_OR_RETURN( model_proto->ParseFromArray(serialized.data(), serialized.size())); return Load(std::move(model_proto)); @@ -244,11 +243,11 @@ util::Status SentencePieceProcessor::Load( std::unique_ptr model_proto) { model_proto_ = std::move(model_proto); model_ = ModelFactory::Create(*model_proto_); - normalizer_ = absl::make_unique( + normalizer_ = std::make_unique( model_proto_->normalizer_spec(), model_proto_->trainer_spec()); if (model_proto_->has_denormalizer_spec() && !model_proto_->denormalizer_spec().precompiled_charsmap().empty()) { - denormalizer_ = absl::make_unique( + denormalizer_ = std::make_unique( model_proto_->denormalizer_spec()); } diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc index f05dc5d..0f00515 100644 --- a/src/sentencepiece_processor_test.cc +++ b/src/sentencepiece_processor_test.cc @@ -25,7 +25,6 @@ #include "sentencepiece_trainer.h" #include "testharness.h" #include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/memory/memory.h" #include "third_party/absl/strings/str_cat.h" #include "third_party/absl/strings/string_view.h" #include "util.h" @@ -123,7 +122,7 @@ NormalizerSpec MakeDefaultNormalizerSpec() { TEST(SentencepieceProcessorTest, StatusTest) { SentencePieceProcessor sp; EXPECT_FALSE(sp.status().ok()); - auto mock = absl::make_unique(); + auto mock = std::make_unique(); sp.SetModel(std::move(mock)); EXPECT_FALSE(sp.status().ok()); } @@ -135,7 +134,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) { const auto normalization_spec = MakeDefaultNormalizerSpec(); { - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const EncodeResult result = { {WS "ABC", 3}, {WS "DE", 4}, {"F", 0}, {"", 2}}; @@ -143,7 +142,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) { sp.SetModel(std::move(mock)); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); std::vector output; EXPECT_TRUE(sp.Encode("ABC DEF", &output).ok()); @@ -186,7 +185,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) { // Unknown sequences. { - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const EncodeResult result = { {WS "ABC", 3}, {WS "D", 4}, {"E", 0}, {"F", 0}, {"", 2}}; @@ -196,7 +195,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) { mock->SetEncodeResult(kInput, result); sp.SetModel(std::move(mock)); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); std::vector output; EXPECT_TRUE(sp.Encode("ABC DEF", &output).ok()); @@ -236,7 +235,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) { // Byte-fallback. { const absl::string_view kInput2 = WS "ABC" WS "DEFあ"; - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const EncodeResult result = {{WS "ABC", 3}, {WS "D", 4}, {"E", 0}, {"F", 0}, {"あ", 0}, {"", 2}}; @@ -250,7 +249,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) { mock->SetEncodeResult(kInput2, result); sp.SetModel(std::move(mock)); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); std::vector output; EXPECT_TRUE(sp.Encode("ABC DEFあ", &output).ok()); @@ -306,12 +305,12 @@ TEST(SentencepieceProcessorTest, EncodeTest) { // Crash if // ModelInterface::Encode() returns shorter results. { - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const EncodeResult result = {{WS "ABC", 3}}; mock->SetEncodeResult(kInput, result); sp.SetModel(std::move(mock)); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; // Expects crash. EXPECT_FALSE(sp.Encode("ABC DEF", &spt).ok()); @@ -320,13 +319,13 @@ TEST(SentencepieceProcessorTest, EncodeTest) { // Crash if // ModelInterface::Encode() returns longer results. { - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const EncodeResult result = { {WS "ABC", 3}, {WS "DE", 4}, {"F", 5}, {"G", 6}}; mock->SetEncodeResult(kInput, result); sp.SetModel(std::move(mock)); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; // Expects crash. EXPECT_FALSE(sp.Encode("ABC DEF", &spt).ok()); @@ -335,13 +334,13 @@ TEST(SentencepieceProcessorTest, EncodeTest) { // Crash if // ModelInterface::Encode() returns an empty piece. { - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const EncodeResult result = { {WS "ABC", 3}, {WS "DE", 4}, {"", 5}, {"F", 6}}; mock->SetEncodeResult(kInput, result); sp.SetModel(std::move(mock)); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; // Expects crash. EXPECT_FALSE(sp.Encode("ABC DEF", &spt).ok()); @@ -349,7 +348,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) { // Halfwidth to Fullwidith katakana normalization. { - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const EncodeResult result = {{WS "グー", 3}, {"グル", 4}, {"", 2}}; const absl::string_view input = WS "グーグル"; mock->SetEncodeResult(input, result); @@ -383,7 +382,7 @@ TEST(SentencepieceProcessorTest, EncodeTest) { // One to many normalization. { - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const EncodeResult result = {{WS "株式", 3}, {"会社", 4}, {"", 2}}; const absl::string_view input = WS "株式会社"; mock->SetEncodeResult(input, result); @@ -422,7 +421,7 @@ TEST(SentencepieceProcessorTest, NBestEncodeTest) { const auto normalization_spec = MakeDefaultNormalizerSpec(); - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const NBestEncodeResult result = { {{{WS "ABC", 3}, {WS "DE", 4}, {"F", 0}, {"", 2}}, @@ -433,7 +432,7 @@ TEST(SentencepieceProcessorTest, NBestEncodeTest) { mock->SetNBestEncodeResult(kInput, result); sp.SetModel(std::move(mock)); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); std::vector> output; EXPECT_TRUE(sp.NBestEncode("ABC DEF", 2, &output).ok()); @@ -464,7 +463,7 @@ TEST(SentencepieceProcessorTest, NBestEncodeTest) { spt2.ParseFromString(sp.NBestEncodeAsSerializedProto("ABC DEF", 2))); EXPECT_EQ(spt.SerializeAsString(), spt2.SerializeAsString()); - auto mock_empty = absl::make_unique(); + auto mock_empty = std::make_unique(); mock_empty->SetNBestEncodeResult(kInput, {}); sp.SetModel(std::move(mock_empty)); EXPECT_FALSE(sp.NBestEncode("ABC DEF", 2, &output).ok()); @@ -476,7 +475,7 @@ TEST(SentencepieceProcessorTest, SampleEncodeTest) { const auto normalization_spec = MakeDefaultNormalizerSpec(); - auto mock = absl::make_unique(); + auto mock = std::make_unique(); const EncodeResult result = { {WS "ABC", 3}, {WS "DE", 4}, {"F", 0}, {"", 2}}; @@ -490,7 +489,7 @@ TEST(SentencepieceProcessorTest, SampleEncodeTest) { mock->SetEncodeResult(kInput, result); sp.SetModel(std::move(mock)); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); std::vector output; EXPECT_TRUE(sp.SampleEncode("ABC DEF", -1, 0.5, &output).ok()); @@ -536,7 +535,7 @@ TEST(SentencepieceProcessorTest, SampleEncodeTest) { const float prob = 1.0 * freq[0] / (freq[0] + freq[1]); EXPECT_NEAR(prob, expected_prob, 0.05); - auto mock_empty = absl::make_unique(); + auto mock_empty = std::make_unique(); mock_empty->SetNBestEncodeResult(kInput, {}); sp.SetModel(std::move(mock_empty)); EXPECT_FALSE(sp.SampleEncode("ABC DEF", 10, 0.5, &output).ok()); @@ -578,12 +577,12 @@ TEST(SentencepieceProcessorTest, DecodeTest) { { SentencePieceProcessor sp; - auto mock = absl::make_unique(); + auto mock = std::make_unique(); sp.SetModel(std::move(mock)); const auto normalization_spec = MakeDefaultNormalizerSpec(); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; @@ -629,15 +628,15 @@ TEST(SentencepieceProcessorTest, DecodeTest) { // unk_surface is not defined. { SentencePieceProcessor sp; - auto proto = absl::make_unique(); + auto proto = std::make_unique(); sp.Load(std::move(proto)).IgnoreError(); - auto mock = absl::make_unique(); + auto mock = std::make_unique(); sp.SetModel(std::move(mock)); const auto normalization_spec = MakeDefaultNormalizerSpec(); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; @@ -648,16 +647,16 @@ TEST(SentencepieceProcessorTest, DecodeTest) { { SentencePieceProcessor sp; - auto proto = absl::make_unique(); + auto proto = std::make_unique(); proto->mutable_trainer_spec()->set_unk_surface(""); sp.Load(std::move(proto)).IgnoreError(); - auto mock = absl::make_unique(); + auto mock = std::make_unique(); sp.SetModel(std::move(mock)); const auto normalization_spec = MakeDefaultNormalizerSpec(); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; @@ -668,16 +667,16 @@ TEST(SentencepieceProcessorTest, DecodeTest) { { SentencePieceProcessor sp; - auto proto = absl::make_unique(); + auto proto = std::make_unique(); proto->mutable_trainer_spec()->set_unk_surface(""); sp.Load(std::move(proto)).IgnoreError(); - auto mock = absl::make_unique(); + auto mock = std::make_unique(); sp.SetModel(std::move(mock)); const auto normalization_spec = MakeDefaultNormalizerSpec(); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; @@ -688,18 +687,18 @@ TEST(SentencepieceProcessorTest, DecodeTest) { { SentencePieceProcessor sp; - auto proto = absl::make_unique(); + auto proto = std::make_unique(); proto->mutable_trainer_spec()->set_unk_surface(""); proto->mutable_normalizer_spec()->set_add_dummy_prefix(false); proto->mutable_normalizer_spec()->set_remove_extra_whitespaces(false); sp.Load(std::move(proto)).IgnoreError(); - auto mock = absl::make_unique(); + auto mock = std::make_unique(); sp.SetModel(std::move(mock)); const auto normalization_spec = MakeDefaultNormalizerSpec(); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; @@ -746,18 +745,18 @@ TEST(SentencepieceProcessorTest, DummyPrefixDecodeTest) { { SentencePieceProcessor sp; - auto proto = absl::make_unique(); + auto proto = std::make_unique(); proto->mutable_trainer_spec()->set_unk_surface(""); proto->mutable_normalizer_spec()->set_add_dummy_prefix(true); proto->mutable_normalizer_spec()->set_remove_extra_whitespaces(false); sp.Load(std::move(proto)).IgnoreError(); - auto mock = absl::make_unique(); + auto mock = std::make_unique(); sp.SetModel(std::move(mock)); const auto normalization_spec = MakeDefaultNormalizerSpec(); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; @@ -768,18 +767,18 @@ TEST(SentencepieceProcessorTest, DummyPrefixDecodeTest) { { SentencePieceProcessor sp; - auto proto = absl::make_unique(); + auto proto = std::make_unique(); proto->mutable_trainer_spec()->set_unk_surface(""); proto->mutable_normalizer_spec()->set_add_dummy_prefix(true); proto->mutable_normalizer_spec()->set_remove_extra_whitespaces(true); sp.Load(std::move(proto)).IgnoreError(); - auto mock = absl::make_unique(); + auto mock = std::make_unique(); sp.SetModel(std::move(mock)); const auto normalization_spec = MakeDefaultNormalizerSpec(); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); SentencePieceText spt; @@ -833,12 +832,12 @@ TEST(SentencepieceProcessorTest, ByteFallbackDecodeTest) { }; SentencePieceProcessor sp; - auto mock = absl::make_unique(); + auto mock = std::make_unique(); sp.SetModel(std::move(mock)); const auto normalization_spec = MakeDefaultNormalizerSpec(); sp.SetNormalizer( - absl::make_unique(normalization_spec)); + std::make_unique(normalization_spec)); { const std::vector input = { @@ -1347,7 +1346,7 @@ TEST(SentencePieceProcessorTest, EndToEndTest) { // Moves ModelProto. { SentencePieceProcessor sp; - auto moved = absl::make_unique(); + auto moved = std::make_unique(); const ModelProto *moved_ptr = moved.get(); *moved = model_proto; EXPECT_TRUE(sp.Load(std::move(moved)).ok()); diff --git a/src/trainer_factory.cc b/src/trainer_factory.cc index d1d2541..6fe73c3 100644 --- a/src/trainer_factory.cc +++ b/src/trainer_factory.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "trainer_factory.h" + #include "bpe_model_trainer.h" #include "char_model_trainer.h" -#include "third_party/absl/memory/memory.h" -#include "trainer_factory.h" #include "unigram_model_trainer.h" #include "word_model_trainer.h" @@ -27,27 +27,27 @@ std::unique_ptr TrainerFactory::Create( const NormalizerSpec &denormalizer_spec) { switch (trainer_spec.model_type()) { case TrainerSpec::UNIGRAM: - return absl::make_unique(trainer_spec, normalizer_spec, - denormalizer_spec); + return std::make_unique(trainer_spec, normalizer_spec, + denormalizer_spec); break; case TrainerSpec::BPE: - return absl::make_unique(trainer_spec, normalizer_spec, - denormalizer_spec); + return std::make_unique(trainer_spec, normalizer_spec, + denormalizer_spec); break; case TrainerSpec::WORD: - return absl::make_unique(trainer_spec, normalizer_spec, - denormalizer_spec); + return std::make_unique(trainer_spec, normalizer_spec, + denormalizer_spec); break; case TrainerSpec::CHAR: - return absl::make_unique( - trainer_spec, normalizer_spec, denormalizer_spec); + return std::make_unique(trainer_spec, normalizer_spec, + denormalizer_spec); break; default: LOG(FATAL) << "Unknown model_type: " << trainer_spec.model_type(); break; } - return absl::make_unique(trainer_spec, normalizer_spec, - denormalizer_spec); + return std::make_unique(trainer_spec, normalizer_spec, + denormalizer_spec); } } // namespace sentencepiece diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc index 672f653..84d760c 100644 --- a/src/trainer_interface.cc +++ b/src/trainer_interface.cc @@ -29,9 +29,6 @@ #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" #include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/memory/memory.h" -#include "third_party/absl/random/distributions.h" -#include "third_party/absl/random/random.h" #include "third_party/absl/strings/numbers.h" #include "third_party/absl/strings/str_cat.h" #include "third_party/absl/strings/str_format.h" @@ -107,7 +104,7 @@ class SentenceSelector { if (spec_->input_sentence_size() > 0) { if (spec_->shuffle_input_sentence()) { constexpr size_t kSeed = 12345678; - sampler_ = absl::make_unique( + sampler_ = std::make_unique( sentences, spec_->input_sentence_size(), kSeed); } else { LOG(INFO) @@ -303,12 +300,12 @@ bool TrainerInterface::IsValidSentencePiece( } template -void AddDPNoise(const TrainerSpec &trainer_spec, - random::SharedBitGen &generator, T *to_update) { +void AddDPNoise(const TrainerSpec &trainer_spec, std::mt19937 *generator, + T *to_update) { if (trainer_spec.differential_privacy_noise_level() > 0) { - float random_num = absl::Gaussian( - generator, 0, trainer_spec.differential_privacy_noise_level()); - + std::normal_distribution dist( + 0.0f, trainer_spec.differential_privacy_noise_level()); + const float random_num = dist(*generator); *to_update = std::round(std::max(0.f, random_num + static_cast(*to_update))); } @@ -351,7 +348,7 @@ util::Status TrainerInterface::LoadSentences() { LOG(INFO) << "SentenceIterator is not specified. Using " "MultiFileSentenceIterator."; sentence_iterator_impl = - absl::make_unique(std::vector( + std::make_unique(std::vector( trainer_spec_.input().begin(), trainer_spec_.input().end())); sentence_iterator_ = sentence_iterator_impl.get(); } @@ -428,7 +425,7 @@ END: LOG(INFO) << "Normalizing sentences..."; CHECK_OR_RETURN(!sentences_.empty()); { - auto pool = absl::make_unique(trainer_spec_.num_threads()); + auto pool = std::make_unique(trainer_spec_.num_threads()); pool->StartWorkers(); for (int n = 0; n < trainer_spec_.num_threads(); ++n) { pool->Schedule([&, n]() { @@ -475,12 +472,12 @@ END: std::min(trainer_spec_.num_threads(), sentences_.size() - 1); { - auto pool = absl::make_unique(num_workers); + auto pool = std::make_unique(num_workers); pool->StartWorkers(); for (int n = 0; n < num_workers; ++n) { pool->Schedule([&, n]() { // One per thread generator. - random::SharedBitGen generator; + auto *generator = random::GetRandomGenerator(); for (size_t i = n; i < sentences_.size(); i += num_workers) { AddDPNoise(trainer_spec_, generator, &(sentences_[i].second)); diff --git a/src/unigram_model.cc b/src/unigram_model.cc index d9f1ce9..13f15c8 100644 --- a/src/unigram_model.cc +++ b/src/unigram_model.cc @@ -25,7 +25,6 @@ #include #include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/memory/memory.h" #include "third_party/absl/strings/str_split.h" #include "third_party/absl/strings/string_view.h" #include "util.h" @@ -626,7 +625,7 @@ void Model::BuildTrie(std::vector> *pieces) { value[i] = (*pieces)[i].second; // vocab_id } - trie_ = absl::make_unique(); + trie_ = std::make_unique(); if (trie_->build(key.size(), const_cast(&key[0]), nullptr, &value[0]) != 0) { status_ = util::InternalError("cannot build double-array."); diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc index d58c408..d921c06 100644 --- a/src/unigram_model_trainer.cc +++ b/src/unigram_model_trainer.cc @@ -28,7 +28,6 @@ #include "pretokenizer_for_training.h" #include "sentencepiece_trainer.h" #include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/memory/memory.h" #include "third_party/absl/strings/str_replace.h" #include "third_party/absl/strings/str_split.h" #include "third_party/esaxx/esa.hxx" // Suffix array library. @@ -283,7 +282,7 @@ std::vector Trainer::RunEStep(const TrainerModel &model, float *obj, std::vector objs(trainer_spec_.num_threads(), 0.0); std::vector ntokens(trainer_spec_.num_threads(), 0.0); - auto pool = absl::make_unique(trainer_spec_.num_threads()); + auto pool = std::make_unique(trainer_spec_.num_threads()); pool->StartWorkers(); int64 all_sentence_freq = 0; @@ -405,7 +404,7 @@ TrainerModel::SentencePieces Trainer::PruneSentencePieces( std::vector>> inverteds( trainer_spec_.num_threads()); - auto pool = absl::make_unique(trainer_spec_.num_threads()); + auto pool = std::make_unique(trainer_spec_.num_threads()); pool->StartWorkers(); for (int n = 0; n < trainer_spec_.num_threads(); ++n) { freqs[n].resize(sentencepieces.size(), 0.0); diff --git a/src/util.h b/src/util.h index b305aa8..cd84327 100644 --- a/src/util.h +++ b/src/util.h @@ -288,11 +288,6 @@ namespace random { std::mt19937 *GetRandomGenerator(); -class SharedBitGen { - public: - std::mt19937 *engine() { return GetRandomGenerator(); } -}; - template class ReservoirSampler { public: diff --git a/third_party/absl/random/random.h b/third_party/absl/container/btree_set.h similarity index 66% rename from third_party/absl/random/random.h rename to third_party/absl/container/btree_set.h index d131d80..4695e8a 100644 --- a/third_party/absl/random/random.h +++ b/third_party/absl/container/btree_set.h @@ -12,7 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License.! -#ifndef ABSL_CONTAINER_RANDOM_H_ -#define ABSL_CONTAINER_RANDOM_H_ +#ifndef ABSL_CONTAINER_BTREE_SET_ +#define ABSL_CONTAINER_BTREE_SET_ -#endif // ABSL_CONTAINER_RANDOM_H_ +#include + +namespace absl { + +template , + typename Allocator = std::allocator> +using btree_set = std::set; + +} + +#endif // ABSL_CONTAINER_BTREE_SET_ diff --git a/third_party/absl/memory/memory.h b/third_party/absl/memory/memory.h deleted file mode 100644 index 6aaf0c9..0000000 --- a/third_party/absl/memory/memory.h +++ /dev/null @@ -1,71 +0,0 @@ -// -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// ----------------------------------------------------------------------------- -// File: string_view.h -// ----------------------------------------------------------------------------- -// -// This file contains the definition of the `absl::string_view` class. A -// `string_view` points to a contiguous span of characters, often part or all of -// another `std::string`, double-quoted std::string literal, character array, or -// even another `string_view`. -// -// This `absl::string_view` abstraction is designed to be a drop-in -// replacement for the C++17 `std::string_view` abstraction. -#ifndef ABSL_MEMORY_MEMORY_H_ -#define ABSL_MEMORY_MEMORY_H_ - -#include - -namespace absl { - -// Trait to select overloads and return types for MakeUnique. -template -struct MakeUniqueResult { - using scalar = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using array = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using invalid = void; -}; - -// MakeUnique(...) is an early implementation of C++14 std::make_unique. -// It is designed to be 100% compatible with std::make_unique so that the -// eventual switchover will be a simple renaming operation. -template -typename MakeUniqueResult::scalar make_unique(Args &&... args) { // NOLINT - return std::unique_ptr( - new T(std::forward(args)...)); // NOLINT(build/c++11) -} - -// Overload for array of unknown bound. -// The allocation of arrays needs to use the array form of new, -// and cannot take element constructor arguments. -template -typename MakeUniqueResult::array make_unique(size_t n) { - return std::unique_ptr(new typename std::remove_extent::type[n]()); -} - -// Reject arrays of known bound. -template -typename MakeUniqueResult::invalid make_unique(Args &&... /* args */) = - delete; // NOLINT - -} // namespace absl -#endif // ABSL_MEMORY_MEMORY_H_ diff --git a/third_party/absl/random/distributions.h b/third_party/absl/random/distributions.h deleted file mode 100644 index b559db9..0000000 --- a/third_party/absl/random/distributions.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2016 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License.! - -#ifndef ABSL_CONTAINER_DISTRIBUTIONS_H_ -#define ABSL_CONTAINER_DISTRIBUTIONS_H_ - -#include - -#include "random.h" - -namespace absl { - -template -T Gaussian(G &generator, T mean, T stddev) { - std::normal_distribution<> dist(mean, stddev); - return dist(*generator.engine()); -} -} // namespace absl - -#endif // ABSL_CONTAINER_DISTRIBUTIONS_H_