Slightly less wasteful OnDiskPt word conversion

Ideally, OnDiskPt would convert its entire vocabulary to Word in advance.
This commit is contained in:
Kenneth Heafield 2012-09-26 12:52:11 +01:00
parent a9c890e55e
commit ab60d1ad6f
4 changed files with 24 additions and 23 deletions

View File

@ -207,9 +207,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
--phraseSize;
for (size_t pos = 0; pos < phraseSize; ++pos) {
Moses::Word *mosesWord = GetWord(pos).ConvertToMoses(Moses::Output, outputFactors, vocab);
ret->AddWord(*mosesWord);
delete mosesWord;
GetWord(pos).ConvertToMoses(outputFactors, vocab, ret->AddWord());
}
// scores
@ -223,9 +221,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
}
ret->SetAlignmentInfo(alignmentInfo);
Moses::Word *lhs = GetWord(GetSize() - 1).ConvertToMoses(Moses::Output, outputFactors, vocab);
ret->SetTargetLHS(*lhs);
delete lhs;
GetWord(GetSize() - 1).ConvertToMoses(outputFactors, vocab, ret->MutableTargetLHS());
return ret;
}

View File

@ -23,6 +23,9 @@
#include "../moses/src/Word.h"
#include "Word.h"
#include "util/tokenize_piece.hh"
#include "util/exception.hh"
using namespace std;
namespace OnDiskPt
@ -94,23 +97,21 @@ size_t Word::ReadFromFile(std::fstream &file)
return memUsed;
}
Moses::Word *Word::ConvertToMoses(Moses::FactorDirection direction
, const std::vector<Moses::FactorType> &outputFactorsVec
, const Vocab &vocab) const
{
void Word::ConvertToMoses(
const std::vector<Moses::FactorType> &outputFactorsVec,
const Vocab &vocab,
Moses::Word &overwrite) const {
Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
Moses::Word *ret = new Moses::Word(m_isNonTerminal);
overwrite = Moses::Word(m_isNonTerminal);
const string &str = vocab.GetString(m_vocabId);
vector<string> toks = Moses::Tokenize(str, "|");
for (size_t ind = 0; ind < toks.size(); ++ind) {
Moses::FactorType factorType = outputFactorsVec[ind];
const Moses::Factor *factor = factorColl.AddFactor(direction, factorType, toks[ind]);
ret->SetFactor(factorType, factor);
// TODO: this conversion should have been done at load time.
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
UTIL_THROW_IF(!tok, util::Exception, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
overwrite.SetFactor(*t, factorColl.AddFactor(*tok));
}
return ret;
UTIL_THROW_IF(tok, util::Exception, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
int Word::Compare(const Word &compare) const

View File

@ -70,9 +70,10 @@ public:
m_vocabId = vocabId;
}
Moses::Word *ConvertToMoses(Moses::FactorDirection direction
, const std::vector<Moses::FactorType> &outputFactorsVec
, const Vocab &vocab) const;
void ConvertToMoses(
const std::vector<Moses::FactorType> &outputFactorsVec,
const Vocab &vocab,
Moses::Word &overwrite) const;
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;

View File

@ -149,6 +149,9 @@ public:
void SetTargetLHS(const Word &lhs) {
m_lhsTarget = lhs;
}
Word &MutableTargetLHS() {
return m_lhsTarget;
}
const Word &GetTargetLHS() const {
return m_lhsTarget;
}