Memory efficiency: make the reserveSize argument non-optional in

Moses::Phrase's constructor.  The default used to be ARRAY_SIZE_INCR = 10,
which will be excessive in many cases.  Where the default was used, I've
set the exact size where that was obvious and explicitly used ARRAY_SIZE_INCR 
otherwise.  If you know the code involved, it's probably worth reviewing.



git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3908 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
pjwilliams 2011-02-28 11:41:08 +00:00
parent a5a860ad1f
commit 75709a6c87
20 changed files with 37 additions and 34 deletions

View File

@ -86,7 +86,7 @@ int main(int argc, char** argv)
if(use_context) {
c_mask.push_back(0);
}
Phrase e(Output),f(Input),c(Output);
Phrase e(Output, 0),f(Input, 0),c(Output, 0);
e.CreateFromString(e_mask, query_e, "|");
f.CreateFromString(f_mask, query_f, "|");
c.CreateFromString(c_mask, query_c,"|");

View File

@ -269,7 +269,7 @@ void IOWrapper::OutputBestHypo(const MosesChart::Hypothesis *hypo, long translat
if (StaticData::Instance().IsPathRecoveryEnabled()) {
out << "||| ";
}
Phrase outPhrase(Output);
Phrase outPhrase(Output, ARRAY_SIZE_INCR);
hypo->CreateOutputPhrase(outPhrase);
// delete 1st & last

View File

@ -107,7 +107,7 @@ void Hypothesis::CreateOutputPhrase(Phrase &outPhrase) const
Phrase Hypothesis::GetOutputPhrase() const
{
Phrase outPhrase(Output);
Phrase outPhrase(Output, ARRAY_SIZE_INCR);
CreateOutputPhrase(outPhrase);
return outPhrase;
}
@ -219,7 +219,7 @@ void Hypothesis::CalcLMScore()
m_scoreBreakdown.ZeroAllLM(lmList);
Phrase outPhrase(Output); // = GetOutputPhrase();
Phrase outPhrase(Output, ARRAY_SIZE_INCR); // = GetOutputPhrase();
bool calcNow = false, firstPhrase = true;
for (size_t targetPhrasePos = 0; targetPhrasePos < GetCurrTargetPhrase().GetSize(); ++targetPhrasePos) {

View File

@ -190,7 +190,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Moses::Word &sourc
// modify the starting bitmap
}
Phrase* m_unksrc = new Phrase(Input);
Phrase* m_unksrc = new Phrase(Input, 1);
m_unksrc->AddWord() = sourceWord;
m_unksrcs.push_back(m_unksrc);

View File

@ -98,7 +98,7 @@ TrellisNode::~TrellisNode()
Moses::Phrase TrellisNode::GetOutputPhrase() const
{
// exactly like same fn in hypothesis, but use trellis nodes instead of prevHypos pointer
Moses::Phrase ret(Moses::Output);
Moses::Phrase ret(Moses::Output, Moses::ARRAY_SIZE_INCR);
const Moses::Phrase &currTargetPhrase = m_hypo->GetCurrTargetPhrase();
for (size_t pos = 0; pos < currTargetPhrase.GetSize(); ++pos) {

View File

@ -36,7 +36,7 @@ void extract_ngrams(const vector<Word >& sentence, map < Phrase, int > & allngr
{
for (int k = 0; k < (int)bleu_order; k++) {
for(int i =0; i < max((int)sentence.size()-k,0); i++) {
Phrase ngram(Output);
Phrase ngram(Output, k+1);
for ( int j = i; j<= i+k; j++) {
ngram.AddWord(sentence[j]);
}
@ -402,7 +402,7 @@ const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & inco
for (size_t start = 0; start < currPhrase.GetSize(); ++start) {
for (size_t end = start; end < start + bleu_order; ++end) {
if (end < currPhrase.GetSize()) {
Phrase edgeNgram(Output);
Phrase edgeNgram(Output, end-start+1);
for (size_t index = start; index <= end; ++index) {
edgeNgram.AddWord(currPhrase.GetWord(index));
}
@ -433,8 +433,8 @@ const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & inco
cerr << "edgeInNgram: " << edgeIncomingNgram << endl;
}
Phrase edgeSuffix(Output);
Phrase ngramSuffix(Output);
Phrase edgeSuffix(Output, ARRAY_SIZE_INCR);
Phrase ngramSuffix(Output, ARRAY_SIZE_INCR);
GetPhraseSuffix(edgeWords,back,edgeSuffix);
GetPhraseSuffix(edgeIncomingNgram,back,ngramSuffix);

View File

@ -182,7 +182,7 @@ int BilingualDynSuffixArray::LoadCorpus(InputFileStream& corpus, const FactorLis
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
while(getline(corpus, line)) {
sntArray.push_back(sntIdx);
Phrase phrase(direction);
Phrase phrase(direction, ARRAY_SIZE_INCR);
// parse phrase
phrase.CreateFromString( factors, line, factorDelimiter);
// store words in vocabulary and corpus
@ -438,7 +438,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
const unsigned oldSrcCrpSize = m_srcCorpus->size(), oldTrgCrpSize = m_trgCorpus->size();
cerr << "old source corpus size = " << oldSrcCrpSize << "\told target size = " << oldTrgCrpSize << endl;
Phrase sphrase(Input);
Phrase sphrase(Input, ARRAY_SIZE_INCR);
sphrase.CreateFromString(m_inputFactors, source, factorDelimiter);
m_srcVocab->MakeOpen();
// store words in vocabulary and corpus
@ -449,7 +449,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
}
m_srcSntBreaks.push_back(oldSrcCrpSize); // former end of corpus is index of new sentence
m_srcVocab->MakeClosed();
Phrase tphrase(Output);
Phrase tphrase(Output, ARRAY_SIZE_INCR);
tphrase.CreateFromString(m_outputFactors, target, factorDelimiter);
m_trgVocab->MakeOpen();
for(size_t i = 0; i < tphrase.GetSize(); ++i) {

View File

@ -69,7 +69,7 @@ LexicalReordering::~LexicalReordering()
Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
{
return m_table->GetScore(f, e, Phrase(Output));
return m_table->GetScore(f, e, Phrase(Output, ARRAY_SIZE_INCR));
}
FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,

View File

@ -571,8 +571,8 @@ void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f)
if(m_FactorsE.empty()) {
//f is all of key...
Candidates cands;
m_Table->GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);
m_Cache[MakeCacheKey(f,Phrase(Output))] = cands;
m_Table->GetCandidates(MakeTableKey(f,Phrase(Output, ARRAY_SIZE_INCR)),&cands);
m_Cache[MakeCacheKey(f,Phrase(Output, ARRAY_SIZE_INCR))] = cands;
} else {
ObjectPool<PPimp> pool;
PPimp* pPos = m_Table->GetRoot();

View File

@ -236,11 +236,11 @@ public:
std::vector<float> scores;
Phrase src;
State() : range(0,0),scores(0),src(Input) {}
State() : range(0,0),scores(0),src(Input, ARRAY_SIZE_INCR) {}
State(Position b,Position e,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
: ptr(v),range(b,e),scores(sv),src(Input) {}
: ptr(v),range(b,e),scores(sv),src(Input, ARRAY_SIZE_INCR) {}
State(Range const& r,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
: ptr(v),range(r),scores(sv),src(Input) {}
: ptr(v),range(r),scores(sv),src(Input, ARRAY_SIZE_INCR) {}
Position begin() const {
return range.first;

View File

@ -104,7 +104,7 @@ void Phrase::MergeFactors(const Phrase &copy, const std::vector<FactorType>& fac
Phrase Phrase::GetSubString(const WordsRange &wordsRange) const
{
Phrase retPhrase(m_direction);
Phrase retPhrase(m_direction, wordsRange.GetNumWordsCovered());
for (size_t currPos = wordsRange.GetStartPos() ; currPos <= wordsRange.GetEndPos() ; currPos++) {
Word &word = retPhrase.AddWord();
@ -116,7 +116,6 @@ Phrase Phrase::GetSubString(const WordsRange &wordsRange) const
std::string Phrase::GetStringRep(const vector<FactorType> factorsToPrint) const
{
Phrase retPhrase(m_direction);
stringstream strme;
for (size_t pos = 0 ; pos < GetSize() ; pos++) {
strme << GetWord(pos).GetString(factorsToPrint, (pos != GetSize()-1));
@ -189,6 +188,8 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
{
FactorCollection &factorCollection = FactorCollection::Instance();
m_words.reserve(phraseVector.size());
for (size_t phrasePos = 0 ; phrasePos < phraseVector.size() ; phrasePos++) {
// add word this phrase
Word &word = AddWord();
@ -222,6 +223,8 @@ void Phrase::CreateFromStringNewFormat(FactorDirection direction
// to
// "KOMMA|none" "ART|Def.Z" "NN|Neut.NotGen.Sg" "VVFIN|none"
m_words.reserve(annotatedWordVector.size()-1);
for (size_t phrasePos = 0 ; phrasePos < annotatedWordVector.size() - 1 ; phrasePos++) {
string &annotatedWord = annotatedWordVector[phrasePos];
bool isNonTerminal;

View File

@ -58,7 +58,7 @@ public:
/** create empty phrase
* \param direction = language (Input = Source, Output = Target)
*/
Phrase(FactorDirection direction, size_t reserveSize = ARRAY_SIZE_INCR);
Phrase(FactorDirection direction, size_t reserveSize);
/** create phrase from vectors of words */
Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords);

View File

@ -105,7 +105,7 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
}
// source
Phrase sourcePhrase(Input);
Phrase sourcePhrase(Input, 0);
sourcePhrase.CreateFromString( input, phraseVector);
//target
TargetPhrase targetPhrase(Output);

View File

@ -148,7 +148,7 @@ bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
Word sourceLHS, targetLHS;
// source
Phrase sourcePhrase(Input);
Phrase sourcePhrase(Input, 0);
sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);
// create target phrase obj

View File

@ -34,7 +34,7 @@ namespace Moses
{
Sentence::Sentence(FactorDirection direction)
: Phrase(direction)
: Phrase(direction, 0)
, InputType()
{
assert(direction == Input);

View File

@ -395,12 +395,12 @@ bool StaticData::LoadData(Parameter *parameter)
if (vecStr.size() == 1) {
sentenceID++;
Phrase phrase(Output);
Phrase phrase(Output, 0);
phrase.CreateFromString(GetOutputFactorOrder(), vecStr[0], GetFactorDelimiter());
m_constraints.insert(make_pair(sentenceID,phrase));
} else if (vecStr.size() == 2) {
sentenceID = Scan<long>(vecStr[0]);
Phrase phrase(Output);
Phrase phrase(Output, 0);
phrase.CreateFromString(GetOutputFactorOrder(), vecStr[1], GetFactorDelimiter());
m_constraints.insert(make_pair(sentenceID,phrase));
} else {

View File

@ -43,7 +43,7 @@ bool TargetPhrase::printalign=StaticData::Instance().PrintAlignmentInfo();
//bool TargetPhrase::printalign;
TargetPhrase::TargetPhrase(FactorDirection direction, std::string out_string)
:Phrase(direction),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
:Phrase(direction, 0),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
{
//ACAT
@ -55,7 +55,7 @@ TargetPhrase::TargetPhrase(FactorDirection direction, std::string out_string)
TargetPhrase::TargetPhrase(FactorDirection direction)
:Phrase(direction)
:Phrase(direction, ARRAY_SIZE_INCR)
, m_transScore(0.0)
, m_ngramScore(0.0)
, m_fullScore(0.0)

View File

@ -84,7 +84,7 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange
TranslationOption::TranslationOption(const TranslationOption &copy)
: m_targetPhrase(copy.m_targetPhrase)
//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input) : new Phrase(*copy.m_sourcePhrase))
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input, ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
, m_sourceWordsRange(copy.m_sourceWordsRange)
, m_futureScore(copy.m_futureScore)
, m_scoreBreakdown(copy.m_scoreBreakdown)
@ -94,7 +94,7 @@ TranslationOption::TranslationOption(const TranslationOption &copy)
TranslationOption::TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange)
: m_targetPhrase(copy.m_targetPhrase)
//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input) : new Phrase(*copy.m_sourcePhrase))
, m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(Input, ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
, m_sourceWordsRange(sourceWordsRange)
, m_futureScore(copy.m_futureScore)
, m_scoreBreakdown(copy.m_scoreBreakdown)

View File

@ -220,7 +220,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
// modify the starting bitmap
}
Phrase* m_unksrc = new Phrase(Input);
Phrase* m_unksrc = new Phrase(Input, 1);
m_unksrc->AddWord() = sourceWord;
m_unksrcs.push_back(m_unksrc);

View File

@ -156,7 +156,7 @@ void TrellisPath::CreateDeviantPaths(TrellisPathList &pathColl) const
Phrase TrellisPath::GetTargetPhrase() const
{
Phrase targetPhrase(Output);
Phrase targetPhrase(Output, ARRAY_SIZE_INCR);
int numHypo = (int) m_path.size();
for (int node = numHypo - 2 ; node >= 0 ; --node) {
@ -174,7 +174,7 @@ Phrase TrellisPath::GetSurfacePhrase() const
{
const std::vector<FactorType> &outputFactor = StaticData::Instance().GetOutputFactorOrder();
Phrase targetPhrase = GetTargetPhrase()
,ret(Output);
,ret(Output, targetPhrase.GetSize());
for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) {
Word &newWord = ret.AddWord();