Memory efficiency: make the reserveSize argument non-optional in

Moses::Phrase's constructor. The default used to be ARRAY_SIZE_INCR = 10, which will be excessive in many cases. Where the default was used, I've set the exact size where that was obvious and explicitly used ARRAY_SIZE_INCR otherwise. If you know the code involved, it's probably worth reviewing. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3908 1f5c12ca-751b-0410-a591-d2e778427230
2024-12-26 13:23:25 +03:00 · 2011-02-28 11:41:08 +00:00 · 2011-02-28 11:41:08 +00:00 · 75709a6c87
commit 75709a6c87
parent a5a860ad1f
20 changed files with 37 additions and 34 deletions
--- a/misc/queryLexicalTable.cpp
+++ b/misc/queryLexicalTable.cpp
@ -86,7 +86,7 @@ int main(int argc, char** argv)
  if(use_context) {
    c_mask.push_back(0);
  }
-  Phrase e(Output),f(Input),c(Output);
+  Phrase e(Output, 0),f(Input, 0),c(Output, 0);
  e.CreateFromString(e_mask, query_e, "|");
  f.CreateFromString(f_mask, query_f, "|");
  c.CreateFromString(c_mask,  query_c,"|");
--- a/moses-chart-cmd/src/IOWrapper.cpp
+++ b/moses-chart-cmd/src/IOWrapper.cpp
@ -269,7 +269,7 @@ void IOWrapper::OutputBestHypo(const MosesChart::Hypothesis *hypo, long translat
      if (StaticData::Instance().IsPathRecoveryEnabled()) {
        out << "||| ";
      }
-      Phrase outPhrase(Output);
+      Phrase outPhrase(Output, ARRAY_SIZE_INCR);
      hypo->CreateOutputPhrase(outPhrase);

      // delete 1st & last
--- a/moses-chart/src/ChartHypothesis.cpp
+++ b/moses-chart/src/ChartHypothesis.cpp
@ -107,7 +107,7 @@ void Hypothesis::CreateOutputPhrase(Phrase &outPhrase) const

 Phrase Hypothesis::GetOutputPhrase() const
 {
-  Phrase outPhrase(Output);
+  Phrase outPhrase(Output, ARRAY_SIZE_INCR);
  CreateOutputPhrase(outPhrase);
  return outPhrase;
 }
@ -219,7 +219,7 @@ void Hypothesis::CalcLMScore()

  m_scoreBreakdown.ZeroAllLM(lmList);

-  Phrase outPhrase(Output); // = GetOutputPhrase();
+  Phrase outPhrase(Output, ARRAY_SIZE_INCR); // = GetOutputPhrase();
  bool calcNow = false, firstPhrase = true;

  for (size_t targetPhrasePos = 0; targetPhrasePos < GetCurrTargetPhrase().GetSize(); ++targetPhrasePos) {
--- a/moses-chart/src/ChartTranslationOptionCollection.cpp
+++ b/moses-chart/src/ChartTranslationOptionCollection.cpp
@ -190,7 +190,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Moses::Word &sourc
    // modify the starting bitmap
  }

-  Phrase* m_unksrc = new Phrase(Input);
+  Phrase* m_unksrc = new Phrase(Input, 1);
  m_unksrc->AddWord() = sourceWord;
  m_unksrcs.push_back(m_unksrc);

--- a/moses-chart/src/ChartTrellisNode.cpp
+++ b/moses-chart/src/ChartTrellisNode.cpp
@ -98,7 +98,7 @@ TrellisNode::~TrellisNode()
 Moses::Phrase TrellisNode::GetOutputPhrase() const
 {
  // exactly like same fn in hypothesis, but use trellis nodes instead of prevHypos pointer
-  Moses::Phrase ret(Moses::Output);
+  Moses::Phrase ret(Moses::Output, Moses::ARRAY_SIZE_INCR);

  const Moses::Phrase &currTargetPhrase = m_hypo->GetCurrTargetPhrase();
  for (size_t pos = 0; pos < currTargetPhrase.GetSize(); ++pos) {
--- a/moses-cmd/src/LatticeMBR.cpp
+++ b/moses-cmd/src/LatticeMBR.cpp
@ -36,7 +36,7 @@ void extract_ngrams(const vector<Word >& sentence, map < Phrase, int >  & allngr
 {
  for (int k = 0; k < (int)bleu_order; k++) {
    for(int i =0; i < max((int)sentence.size()-k,0); i++) {
-      Phrase ngram(Output);
+      Phrase ngram(Output, k+1);
      for ( int j = i; j<= i+k; j++) {
        ngram.AddWord(sentence[j]);
      }
@ -402,7 +402,7 @@ const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & inco
  for (size_t start = 0; start < currPhrase.GetSize(); ++start) {
    for (size_t end = start; end < start + bleu_order; ++end) {
      if (end < currPhrase.GetSize()) {
-        Phrase edgeNgram(Output);
+        Phrase edgeNgram(Output, end-start+1);
        for (size_t index = start; index <= end; ++index) {
          edgeNgram.AddWord(currPhrase.GetWord(index));
        }
@ -433,8 +433,8 @@ const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & inco
          cerr << "edgeInNgram: " << edgeIncomingNgram << endl;
        }

-        Phrase edgeSuffix(Output);
-        Phrase ngramSuffix(Output);
+        Phrase edgeSuffix(Output, ARRAY_SIZE_INCR);
+        Phrase ngramSuffix(Output, ARRAY_SIZE_INCR);
        GetPhraseSuffix(edgeWords,back,edgeSuffix);
        GetPhraseSuffix(edgeIncomingNgram,back,ngramSuffix);

--- a/moses/src/BilingualDynSuffixArray.cpp
+++ b/moses/src/BilingualDynSuffixArray.cpp
@ -182,7 +182,7 @@ int BilingualDynSuffixArray::LoadCorpus(InputFileStream& corpus, const FactorLis
 	const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
 	while(getline(corpus, line)) {
 		sntArray.push_back(sntIdx);
-		Phrase phrase(direction);
+		Phrase phrase(direction, ARRAY_SIZE_INCR);
 		// parse phrase
 		phrase.CreateFromString( factors, line, factorDelimiter);
 		// store words in vocabulary and corpus
@ -438,7 +438,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
 	const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
  const unsigned oldSrcCrpSize = m_srcCorpus->size(), oldTrgCrpSize = m_trgCorpus->size();
  cerr << "old source corpus size = " << oldSrcCrpSize << "\told target size = " << oldTrgCrpSize << endl;
-  Phrase sphrase(Input);
+  Phrase sphrase(Input, ARRAY_SIZE_INCR);
  sphrase.CreateFromString(m_inputFactors, source, factorDelimiter);
  m_srcVocab->MakeOpen();
  // store words in vocabulary and corpus
@ -449,7 +449,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
  }
  m_srcSntBreaks.push_back(oldSrcCrpSize); // former end of corpus is index of new sentence 
  m_srcVocab->MakeClosed();
-  Phrase tphrase(Output);
+  Phrase tphrase(Output, ARRAY_SIZE_INCR);
  tphrase.CreateFromString(m_outputFactors, target, factorDelimiter);
  m_trgVocab->MakeOpen();
  for(size_t i = 0; i < tphrase.GetSize(); ++i) {
--- a/moses/src/LexicalReordering.cpp
+++ b/moses/src/LexicalReordering.cpp
@ -69,7 +69,7 @@ LexicalReordering::~LexicalReordering()

 Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
 {
-  return m_table->GetScore(f, e, Phrase(Output));
+  return m_table->GetScore(f, e, Phrase(Output, ARRAY_SIZE_INCR));
 }

 FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,
--- a/moses/src/LexicalReorderingTable.cpp
+++ b/moses/src/LexicalReorderingTable.cpp
@ -571,8 +571,8 @@ void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f)
  if(m_FactorsE.empty()) {
    //f is all of key...
    Candidates cands;
-    m_Table->GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);
-    m_Cache[MakeCacheKey(f,Phrase(Output))] = cands;
+    m_Table->GetCandidates(MakeTableKey(f,Phrase(Output, ARRAY_SIZE_INCR)),&cands);
+    m_Cache[MakeCacheKey(f,Phrase(Output, ARRAY_SIZE_INCR))] = cands;
  } else {
    ObjectPool<PPimp>     pool;
    PPimp* pPos  = m_Table->GetRoot();
--- a/moses/src/PDTAimp.h
+++ b/moses/src/PDTAimp.h
@ -236,11 +236,11 @@ public:
    std::vector<float> scores;
    Phrase src;

-    State() : range(0,0),scores(0),src(Input) {}
+    State() : range(0,0),scores(0),src(Input, ARRAY_SIZE_INCR) {}
    State(Position b,Position e,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
-      : ptr(v),range(b,e),scores(sv),src(Input) {}
+      : ptr(v),range(b,e),scores(sv),src(Input, ARRAY_SIZE_INCR) {}
    State(Range const& r,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
-      : ptr(v),range(r),scores(sv),src(Input) {}
+      : ptr(v),range(r),scores(sv),src(Input, ARRAY_SIZE_INCR) {}

    Position begin() const {
      return range.first;
--- a/moses/src/Phrase.cpp
+++ b/moses/src/Phrase.cpp
@ -104,7 +104,7 @@ void Phrase::MergeFactors(const Phrase &copy, const std::vector<FactorType>& fac

 Phrase Phrase::GetSubString(const WordsRange &wordsRange) const
 {
-  Phrase retPhrase(m_direction);
+  Phrase retPhrase(m_direction, wordsRange.GetNumWordsCovered());

  for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++) {
    Word &word = retPhrase.AddWord();
@ -116,7 +116,6 @@ Phrase Phrase::GetSubString(const WordsRange &wordsRange) const

 std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const
 {
-  Phrase retPhrase(m_direction);
  stringstream strme;
  for (size_t pos = 0 ; pos < GetSize() ; pos++) {
    strme << GetWord(pos).GetString(factorsToPrint, (pos != GetSize()-1));
@ -189,6 +188,8 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
 {
  FactorCollection &factorCollection = FactorCollection::Instance();

+  m_words.reserve(phraseVector.size());
+
  for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++) {
    // add word this phrase
    Word &word = AddWord();
@ -222,6 +223,8 @@ void Phrase::CreateFromStringNewFormat(FactorDirection direction
  //		to
  // "KOMMA|none" "ART|Def.Z" "NN|Neut.NotGen.Sg" "VVFIN|none"

+  m_words.reserve(annotatedWordVector.size()-1);
+
  for (size_t phrasePos = 0 ; phrasePos < annotatedWordVector.size() -  1 ; phrasePos++) {
    string &annotatedWord = annotatedWordVector[phrasePos];
    bool isNonTerminal;
--- a/moses/src/Phrase.h
+++ b/moses/src/Phrase.h
@ -58,7 +58,7 @@ public:
  /** create empty phrase
  * \param direction = language (Input = Source, Output = Target)
  */
-  Phrase(FactorDirection direction, size_t reserveSize = ARRAY_SIZE_INCR);
+  Phrase(FactorDirection direction, size_t reserveSize);
  /** create phrase from vectors of words	*/
  Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords);

--- a/moses/src/PhraseDictionaryMemory.cpp
+++ b/moses/src/PhraseDictionaryMemory.cpp
@ -105,7 +105,7 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
    }

    // source
-    Phrase sourcePhrase(Input);
+    Phrase sourcePhrase(Input, 0);
    sourcePhrase.CreateFromString( input, phraseVector);
    //target
    TargetPhrase targetPhrase(Output);
--- a/moses/src/PhraseDictionarySCFG.cpp
+++ b/moses/src/PhraseDictionarySCFG.cpp
@ -148,7 +148,7 @@ bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
    Word sourceLHS, targetLHS;

    // source
-    Phrase sourcePhrase(Input);
+    Phrase sourcePhrase(Input, 0);
    sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);

    // create target phrase obj
--- a/moses/src/Sentence.cpp
+++ b/moses/src/Sentence.cpp
@ -34,7 +34,7 @@ namespace Moses
 {

 Sentence::Sentence(FactorDirection direction)
-  : Phrase(direction)
+  : Phrase(direction, 0)
  , InputType()
 {
  assert(direction == Input);
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@ -395,12 +395,12 @@ bool StaticData::LoadData(Parameter *parameter)

      if (vecStr.size() == 1) {
        sentenceID++;
-        Phrase phrase(Output);
+        Phrase phrase(Output, 0);
        phrase.CreateFromString(GetOutputFactorOrder(), vecStr[0], GetFactorDelimiter());
        m_constraints.insert(make_pair(sentenceID,phrase));
      } else if (vecStr.size() == 2) {
        sentenceID = Scan<long>(vecStr[0]);
-        Phrase phrase(Output);
+        Phrase phrase(Output, 0);
        phrase.CreateFromString(GetOutputFactorOrder(), vecStr[1], GetFactorDelimiter());
        m_constraints.insert(make_pair(sentenceID,phrase));
      } else {
--- a/moses/src/TargetPhrase.cpp
+++ b/moses/src/TargetPhrase.cpp
@ -43,7 +43,7 @@ bool TargetPhrase::printalign=StaticData::Instance().PrintAlignmentInfo();
 //bool TargetPhrase::printalign;

 TargetPhrase::TargetPhrase(FactorDirection direction, std::string out_string)
-  :Phrase(direction),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
+  :Phrase(direction, 0),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
 {

  //ACAT
@ -55,7 +55,7 @@ TargetPhrase::TargetPhrase(FactorDirection direction, std::string out_string)


 TargetPhrase::TargetPhrase(FactorDirection direction)
-  :Phrase(direction)
+  :Phrase(direction, ARRAY_SIZE_INCR)
  , m_transScore(0.0)
  , m_ngramScore(0.0)
  , m_fullScore(0.0)
--- a/moses/src/TranslationOption.cpp
+++ b/moses/src/TranslationOption.cpp
@ -84,7 +84,7 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange
 TranslationOption::TranslationOption(const TranslationOption &copy)
  : m_targetPhrase(copy.m_targetPhrase)
 //, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
-  , m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input) : new Phrase(*copy.m_sourcePhrase))
+  , m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input, ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
  , m_sourceWordsRange(copy.m_sourceWordsRange)
  , m_futureScore(copy.m_futureScore)
  , m_scoreBreakdown(copy.m_scoreBreakdown)
@ -94,7 +94,7 @@ TranslationOption::TranslationOption(const TranslationOption &copy)
 TranslationOption::TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange)
  : m_targetPhrase(copy.m_targetPhrase)
 //, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
-  , m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input) : new Phrase(*copy.m_sourcePhrase))
+  , m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input, ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
  , m_sourceWordsRange(sourceWordsRange)
  , m_futureScore(copy.m_futureScore)
  , m_scoreBreakdown(copy.m_scoreBreakdown)
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@ -220,7 +220,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
    // modify the starting bitmap
  }

-  Phrase* m_unksrc = new Phrase(Input);
+  Phrase* m_unksrc = new Phrase(Input, 1);
  m_unksrc->AddWord() = sourceWord;
  m_unksrcs.push_back(m_unksrc);

--- a/moses/src/TrellisPath.cpp
+++ b/moses/src/TrellisPath.cpp
@ -156,7 +156,7 @@ void TrellisPath::CreateDeviantPaths(TrellisPathList &pathColl) const

 Phrase TrellisPath::GetTargetPhrase() const
 {
-  Phrase targetPhrase(Output);
+  Phrase targetPhrase(Output, ARRAY_SIZE_INCR);

  int numHypo = (int) m_path.size();
  for (int node = numHypo - 2 ; node >= 0 ; --node) {
@ -174,7 +174,7 @@ Phrase TrellisPath::GetSurfacePhrase() const
 {
  const std::vector<FactorType> &outputFactor = StaticData::Instance().GetOutputFactorOrder();
  Phrase targetPhrase = GetTargetPhrase()
-                        ,ret(Output);
+                        ,ret(Output, targetPhrase.GetSize());

  for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) {
    Word &newWord = ret.AddWord();