mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
Memory efficiency: make the reserveSize argument non-optional in
Moses::Phrase's constructor. The default used to be ARRAY_SIZE_INCR = 10, which will be excessive in many cases. Where the default was used, I've set the exact size where that was obvious and explicitly used ARRAY_SIZE_INCR otherwise. If you know the code involved, it's probably worth reviewing. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3908 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
a5a860ad1f
commit
75709a6c87
@ -86,7 +86,7 @@ int main(int argc, char** argv)
|
||||
if(use_context) {
|
||||
c_mask.push_back(0);
|
||||
}
|
||||
Phrase e(Output),f(Input),c(Output);
|
||||
Phrase e(Output, 0),f(Input, 0),c(Output, 0);
|
||||
e.CreateFromString(e_mask, query_e, "|");
|
||||
f.CreateFromString(f_mask, query_f, "|");
|
||||
c.CreateFromString(c_mask, query_c,"|");
|
||||
|
@ -269,7 +269,7 @@ void IOWrapper::OutputBestHypo(const MosesChart::Hypothesis *hypo, long translat
|
||||
if (StaticData::Instance().IsPathRecoveryEnabled()) {
|
||||
out << "||| ";
|
||||
}
|
||||
Phrase outPhrase(Output);
|
||||
Phrase outPhrase(Output, ARRAY_SIZE_INCR);
|
||||
hypo->CreateOutputPhrase(outPhrase);
|
||||
|
||||
// delete 1st & last
|
||||
|
@ -107,7 +107,7 @@ void Hypothesis::CreateOutputPhrase(Phrase &outPhrase) const
|
||||
|
||||
Phrase Hypothesis::GetOutputPhrase() const
|
||||
{
|
||||
Phrase outPhrase(Output);
|
||||
Phrase outPhrase(Output, ARRAY_SIZE_INCR);
|
||||
CreateOutputPhrase(outPhrase);
|
||||
return outPhrase;
|
||||
}
|
||||
@ -219,7 +219,7 @@ void Hypothesis::CalcLMScore()
|
||||
|
||||
m_scoreBreakdown.ZeroAllLM(lmList);
|
||||
|
||||
Phrase outPhrase(Output); // = GetOutputPhrase();
|
||||
Phrase outPhrase(Output, ARRAY_SIZE_INCR); // = GetOutputPhrase();
|
||||
bool calcNow = false, firstPhrase = true;
|
||||
|
||||
for (size_t targetPhrasePos = 0; targetPhrasePos < GetCurrTargetPhrase().GetSize(); ++targetPhrasePos) {
|
||||
|
@ -190,7 +190,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Moses::Word &sourc
|
||||
// modify the starting bitmap
|
||||
}
|
||||
|
||||
Phrase* m_unksrc = new Phrase(Input);
|
||||
Phrase* m_unksrc = new Phrase(Input, 1);
|
||||
m_unksrc->AddWord() = sourceWord;
|
||||
m_unksrcs.push_back(m_unksrc);
|
||||
|
||||
|
@ -98,7 +98,7 @@ TrellisNode::~TrellisNode()
|
||||
Moses::Phrase TrellisNode::GetOutputPhrase() const
|
||||
{
|
||||
// exactly like same fn in hypothesis, but use trellis nodes instead of prevHypos pointer
|
||||
Moses::Phrase ret(Moses::Output);
|
||||
Moses::Phrase ret(Moses::Output, Moses::ARRAY_SIZE_INCR);
|
||||
|
||||
const Moses::Phrase &currTargetPhrase = m_hypo->GetCurrTargetPhrase();
|
||||
for (size_t pos = 0; pos < currTargetPhrase.GetSize(); ++pos) {
|
||||
|
@ -36,7 +36,7 @@ void extract_ngrams(const vector<Word >& sentence, map < Phrase, int > & allngr
|
||||
{
|
||||
for (int k = 0; k < (int)bleu_order; k++) {
|
||||
for(int i =0; i < max((int)sentence.size()-k,0); i++) {
|
||||
Phrase ngram(Output);
|
||||
Phrase ngram(Output, k+1);
|
||||
for ( int j = i; j<= i+k; j++) {
|
||||
ngram.AddWord(sentence[j]);
|
||||
}
|
||||
@ -402,7 +402,7 @@ const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & inco
|
||||
for (size_t start = 0; start < currPhrase.GetSize(); ++start) {
|
||||
for (size_t end = start; end < start + bleu_order; ++end) {
|
||||
if (end < currPhrase.GetSize()) {
|
||||
Phrase edgeNgram(Output);
|
||||
Phrase edgeNgram(Output, end-start+1);
|
||||
for (size_t index = start; index <= end; ++index) {
|
||||
edgeNgram.AddWord(currPhrase.GetWord(index));
|
||||
}
|
||||
@ -433,8 +433,8 @@ const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & inco
|
||||
cerr << "edgeInNgram: " << edgeIncomingNgram << endl;
|
||||
}
|
||||
|
||||
Phrase edgeSuffix(Output);
|
||||
Phrase ngramSuffix(Output);
|
||||
Phrase edgeSuffix(Output, ARRAY_SIZE_INCR);
|
||||
Phrase ngramSuffix(Output, ARRAY_SIZE_INCR);
|
||||
GetPhraseSuffix(edgeWords,back,edgeSuffix);
|
||||
GetPhraseSuffix(edgeIncomingNgram,back,ngramSuffix);
|
||||
|
||||
|
@ -182,7 +182,7 @@ int BilingualDynSuffixArray::LoadCorpus(InputFileStream& corpus, const FactorLis
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
while(getline(corpus, line)) {
|
||||
sntArray.push_back(sntIdx);
|
||||
Phrase phrase(direction);
|
||||
Phrase phrase(direction, ARRAY_SIZE_INCR);
|
||||
// parse phrase
|
||||
phrase.CreateFromString( factors, line, factorDelimiter);
|
||||
// store words in vocabulary and corpus
|
||||
@ -438,7 +438,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
|
||||
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
|
||||
const unsigned oldSrcCrpSize = m_srcCorpus->size(), oldTrgCrpSize = m_trgCorpus->size();
|
||||
cerr << "old source corpus size = " << oldSrcCrpSize << "\told target size = " << oldTrgCrpSize << endl;
|
||||
Phrase sphrase(Input);
|
||||
Phrase sphrase(Input, ARRAY_SIZE_INCR);
|
||||
sphrase.CreateFromString(m_inputFactors, source, factorDelimiter);
|
||||
m_srcVocab->MakeOpen();
|
||||
// store words in vocabulary and corpus
|
||||
@ -449,7 +449,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
|
||||
}
|
||||
m_srcSntBreaks.push_back(oldSrcCrpSize); // former end of corpus is index of new sentence
|
||||
m_srcVocab->MakeClosed();
|
||||
Phrase tphrase(Output);
|
||||
Phrase tphrase(Output, ARRAY_SIZE_INCR);
|
||||
tphrase.CreateFromString(m_outputFactors, target, factorDelimiter);
|
||||
m_trgVocab->MakeOpen();
|
||||
for(size_t i = 0; i < tphrase.GetSize(); ++i) {
|
||||
|
@ -69,7 +69,7 @@ LexicalReordering::~LexicalReordering()
|
||||
|
||||
Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
|
||||
{
|
||||
return m_table->GetScore(f, e, Phrase(Output));
|
||||
return m_table->GetScore(f, e, Phrase(Output, ARRAY_SIZE_INCR));
|
||||
}
|
||||
|
||||
FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,
|
||||
|
@ -571,8 +571,8 @@ void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f)
|
||||
if(m_FactorsE.empty()) {
|
||||
//f is all of key...
|
||||
Candidates cands;
|
||||
m_Table->GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);
|
||||
m_Cache[MakeCacheKey(f,Phrase(Output))] = cands;
|
||||
m_Table->GetCandidates(MakeTableKey(f,Phrase(Output, ARRAY_SIZE_INCR)),&cands);
|
||||
m_Cache[MakeCacheKey(f,Phrase(Output, ARRAY_SIZE_INCR))] = cands;
|
||||
} else {
|
||||
ObjectPool<PPimp> pool;
|
||||
PPimp* pPos = m_Table->GetRoot();
|
||||
|
@ -236,11 +236,11 @@ public:
|
||||
std::vector<float> scores;
|
||||
Phrase src;
|
||||
|
||||
State() : range(0,0),scores(0),src(Input) {}
|
||||
State() : range(0,0),scores(0),src(Input, ARRAY_SIZE_INCR) {}
|
||||
State(Position b,Position e,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
|
||||
: ptr(v),range(b,e),scores(sv),src(Input) {}
|
||||
: ptr(v),range(b,e),scores(sv),src(Input, ARRAY_SIZE_INCR) {}
|
||||
State(Range const& r,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
|
||||
: ptr(v),range(r),scores(sv),src(Input) {}
|
||||
: ptr(v),range(r),scores(sv),src(Input, ARRAY_SIZE_INCR) {}
|
||||
|
||||
Position begin() const {
|
||||
return range.first;
|
||||
|
@ -104,7 +104,7 @@ void Phrase::MergeFactors(const Phrase ©, const std::vector<FactorType>& fac
|
||||
|
||||
Phrase Phrase::GetSubString(const WordsRange &wordsRange) const
|
||||
{
|
||||
Phrase retPhrase(m_direction);
|
||||
Phrase retPhrase(m_direction, wordsRange.GetNumWordsCovered());
|
||||
|
||||
for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++) {
|
||||
Word &word = retPhrase.AddWord();
|
||||
@ -116,7 +116,6 @@ Phrase Phrase::GetSubString(const WordsRange &wordsRange) const
|
||||
|
||||
std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const
|
||||
{
|
||||
Phrase retPhrase(m_direction);
|
||||
stringstream strme;
|
||||
for (size_t pos = 0 ; pos < GetSize() ; pos++) {
|
||||
strme << GetWord(pos).GetString(factorsToPrint, (pos != GetSize()-1));
|
||||
@ -189,6 +188,8 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
{
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
m_words.reserve(phraseVector.size());
|
||||
|
||||
for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++) {
|
||||
// add word this phrase
|
||||
Word &word = AddWord();
|
||||
@ -222,6 +223,8 @@ void Phrase::CreateFromStringNewFormat(FactorDirection direction
|
||||
// to
|
||||
// "KOMMA|none" "ART|Def.Z" "NN|Neut.NotGen.Sg" "VVFIN|none"
|
||||
|
||||
m_words.reserve(annotatedWordVector.size()-1);
|
||||
|
||||
for (size_t phrasePos = 0 ; phrasePos < annotatedWordVector.size() - 1 ; phrasePos++) {
|
||||
string &annotatedWord = annotatedWordVector[phrasePos];
|
||||
bool isNonTerminal;
|
||||
|
@ -58,7 +58,7 @@ public:
|
||||
/** create empty phrase
|
||||
* \param direction = language (Input = Source, Output = Target)
|
||||
*/
|
||||
Phrase(FactorDirection direction, size_t reserveSize = ARRAY_SIZE_INCR);
|
||||
Phrase(FactorDirection direction, size_t reserveSize);
|
||||
/** create phrase from vectors of words */
|
||||
Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords);
|
||||
|
||||
|
@ -105,7 +105,7 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
||||
}
|
||||
|
||||
// source
|
||||
Phrase sourcePhrase(Input);
|
||||
Phrase sourcePhrase(Input, 0);
|
||||
sourcePhrase.CreateFromString( input, phraseVector);
|
||||
//target
|
||||
TargetPhrase targetPhrase(Output);
|
||||
|
@ -148,7 +148,7 @@ bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
|
||||
Word sourceLHS, targetLHS;
|
||||
|
||||
// source
|
||||
Phrase sourcePhrase(Input);
|
||||
Phrase sourcePhrase(Input, 0);
|
||||
sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);
|
||||
|
||||
// create target phrase obj
|
||||
|
@ -34,7 +34,7 @@ namespace Moses
|
||||
{
|
||||
|
||||
Sentence::Sentence(FactorDirection direction)
|
||||
: Phrase(direction)
|
||||
: Phrase(direction, 0)
|
||||
, InputType()
|
||||
{
|
||||
assert(direction == Input);
|
||||
|
@ -395,12 +395,12 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
if (vecStr.size() == 1) {
|
||||
sentenceID++;
|
||||
Phrase phrase(Output);
|
||||
Phrase phrase(Output, 0);
|
||||
phrase.CreateFromString(GetOutputFactorOrder(), vecStr[0], GetFactorDelimiter());
|
||||
m_constraints.insert(make_pair(sentenceID,phrase));
|
||||
} else if (vecStr.size() == 2) {
|
||||
sentenceID = Scan<long>(vecStr[0]);
|
||||
Phrase phrase(Output);
|
||||
Phrase phrase(Output, 0);
|
||||
phrase.CreateFromString(GetOutputFactorOrder(), vecStr[1], GetFactorDelimiter());
|
||||
m_constraints.insert(make_pair(sentenceID,phrase));
|
||||
} else {
|
||||
|
@ -43,7 +43,7 @@ bool TargetPhrase::printalign=StaticData::Instance().PrintAlignmentInfo();
|
||||
//bool TargetPhrase::printalign;
|
||||
|
||||
TargetPhrase::TargetPhrase(FactorDirection direction, std::string out_string)
|
||||
:Phrase(direction),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
|
||||
:Phrase(direction, 0),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
|
||||
{
|
||||
|
||||
//ACAT
|
||||
@ -55,7 +55,7 @@ TargetPhrase::TargetPhrase(FactorDirection direction, std::string out_string)
|
||||
|
||||
|
||||
TargetPhrase::TargetPhrase(FactorDirection direction)
|
||||
:Phrase(direction)
|
||||
:Phrase(direction, ARRAY_SIZE_INCR)
|
||||
, m_transScore(0.0)
|
||||
, m_ngramScore(0.0)
|
||||
, m_fullScore(0.0)
|
||||
|
@ -84,7 +84,7 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange
|
||||
TranslationOption::TranslationOption(const TranslationOption ©)
|
||||
: m_targetPhrase(copy.m_targetPhrase)
|
||||
//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
|
||||
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input) : new Phrase(*copy.m_sourcePhrase))
|
||||
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input, ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
|
||||
, m_sourceWordsRange(copy.m_sourceWordsRange)
|
||||
, m_futureScore(copy.m_futureScore)
|
||||
, m_scoreBreakdown(copy.m_scoreBreakdown)
|
||||
@ -94,7 +94,7 @@ TranslationOption::TranslationOption(const TranslationOption ©)
|
||||
TranslationOption::TranslationOption(const TranslationOption ©, const WordsRange &sourceWordsRange)
|
||||
: m_targetPhrase(copy.m_targetPhrase)
|
||||
//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
|
||||
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input) : new Phrase(*copy.m_sourcePhrase))
|
||||
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input, ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
|
||||
, m_sourceWordsRange(sourceWordsRange)
|
||||
, m_futureScore(copy.m_futureScore)
|
||||
, m_scoreBreakdown(copy.m_scoreBreakdown)
|
||||
|
@ -220,7 +220,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
|
||||
// modify the starting bitmap
|
||||
}
|
||||
|
||||
Phrase* m_unksrc = new Phrase(Input);
|
||||
Phrase* m_unksrc = new Phrase(Input, 1);
|
||||
m_unksrc->AddWord() = sourceWord;
|
||||
m_unksrcs.push_back(m_unksrc);
|
||||
|
||||
|
@ -156,7 +156,7 @@ void TrellisPath::CreateDeviantPaths(TrellisPathList &pathColl) const
|
||||
|
||||
Phrase TrellisPath::GetTargetPhrase() const
|
||||
{
|
||||
Phrase targetPhrase(Output);
|
||||
Phrase targetPhrase(Output, ARRAY_SIZE_INCR);
|
||||
|
||||
int numHypo = (int) m_path.size();
|
||||
for (int node = numHypo - 2 ; node >= 0 ; --node) {
|
||||
@ -174,7 +174,7 @@ Phrase TrellisPath::GetSurfacePhrase() const
|
||||
{
|
||||
const std::vector<FactorType> &outputFactor = StaticData::Instance().GetOutputFactorOrder();
|
||||
Phrase targetPhrase = GetTargetPhrase()
|
||||
,ret(Output);
|
||||
,ret(Output, targetPhrase.GetSize());
|
||||
|
||||
for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) {
|
||||
Word &newWord = ret.AddWord();
|
||||
|
Loading…
Reference in New Issue
Block a user