Merged PR 20839: Do not ignore ignoreEOS for spm decoding

With final space this eliminates trailing whitespace caused by appending EOS
This commit is contained in:
Marcin Junczys-Dowmunt 2021-09-28 17:17:12 +00:00
parent aa58ba8e23
commit d796a3c3b7

View File

@ -236,18 +236,20 @@ public:
return words;
}
std::string decode(const Words& sentence, bool /*ignoreEOS*/) const override {
std::string decode(const Words& sentence, bool ignoreEOS) const override {
std::string line;
if(keepEncoded_) { // i.e. keep the sentence segmented into subword units
for(const Word& id : sentence)
line += (*this)[id] + " ";
if(!ignoreEOS || id != getEosId())
line += (*this)[id] + " ";
line.pop_back(); // trim the trailing whitespace
} else {
// convert vector of Word to vector of int
std::vector<int> spmSentence;
spmSentence.reserve(sentence.size());
for(auto&& word : sentence)
spmSentence.push_back(word.toWordIndex());
if(!ignoreEOS || word != getEosId())
spmSentence.push_back(word.toWordIndex());
spm_->Decode(spmSentence, &line);
}
return line;