bug fix for distinct n-best list generation, resolved memory blowup issue

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1542 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
phkoehn 2008-01-18 19:00:07 +00:00
parent 1044b16522
commit 0faf1a83c0
3 changed files with 18 additions and 14 deletions

View File

@ -138,7 +138,6 @@ int main(int argc, char* argv[])
manager.ProcessSentence();
// pick best translation (maximum a posteriori decoding)
cerr << "using MBR ? " << ( staticData.UseMBR() ? "yes" : "no" ) << endl;
if (! staticData.UseMBR()) {
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
staticData.GetReportSegmentation(),
@ -172,7 +171,7 @@ int main(int argc, char* argv[])
{
TrellisPathList nBestList;
manager.CalcNBest(nBestSize, nBestList,true);
cerr << "size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl;
VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
IFVERBOSE(2) { PrintUserTime("calculated n-best list for MBR decoding"); }
std::vector<const Factor*> mbrBestHypo = doMBR(nBestList);
ioStream->OutputBestHypo(mbrBestHypo, source->GetTranslationId(),

View File

@ -108,7 +108,7 @@ void Manager::ProcessSentence()
// the stack is pruned before processing (lazy pruning):
VERBOSE(3,"processing hypothesis from next stack");
// VERBOSE("processing next stack at ");
// VERBOSE("processing next stack at ");
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
VERBOSE(3,std::endl);
sourceHypoColl.CleanupArcList();
@ -389,8 +389,12 @@ void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) co
contenders.Add(new TrellisPath(*iterBestHypo));
}
// factor defines stopping point for distinct n-best list if too many candidates identical
const size_t nBestFactor = StaticData::Instance().GetNBestFactor();
if (nBestFactor < 1) nBestFactor = 1000; // 0 = unlimited
// MAIN loop
for (size_t iteration = 0 ; (onlyDistinct ? distinctHyps.size() : ret.GetSize()) < count && contenders.GetSize() > 0 && (iteration < count * 20) ; iteration++)
for (size_t iteration = 0 ; (onlyDistinct ? distinctHyps.size() : ret.GetSize()) < count && contenders.GetSize() > 0 && (iteration < count * nBestFactor) ; iteration++)
{
// get next best from list of contenders
TrellisPath *path = contenders.pop();

View File

@ -121,13 +121,7 @@ bool StaticData::LoadData(Parameter *parameter)
m_nBestFilePath = m_parameter->GetParam("n-best-list")[0];
m_nBestSize = Scan<size_t>( m_parameter->GetParam("n-best-list")[1] );
m_onlyDistinctNBest=(m_parameter->GetParam("n-best-list").size()>2 && m_parameter->GetParam("n-best-list")[2]=="distinct");
if (m_parameter->GetParam("n-best-factor").size() > 0)
{
m_nBestFactor = Scan<size_t>( m_parameter->GetParam("n-best-factor")[0]);
}
}
}
else if (m_parameter->GetParam("n-best-list").size() == 1) {
UserMessage::Add(string("ERROR: wrong format for switch -n-best-list file size"));
return false;
@ -136,6 +130,13 @@ bool StaticData::LoadData(Parameter *parameter)
{
m_nBestSize = 0;
}
if (m_parameter->GetParam("n-best-factor").size() > 0)
{
m_nBestFactor = Scan<size_t>( m_parameter->GetParam("n-best-factor")[0]);
}
else {
m_nBestFactor = 20;
}
// include feature names in the n-best list
SetBooleanParameter( &m_labeledNBestList, "labeled-n-best-list", true );
@ -327,8 +328,8 @@ bool StaticData::LoadLexicalReorderingModel()
const vector<string> weightsStr = m_parameter.GetParam("weight-d");
*/
std::vector<float> weights;
int w = 1; //cur weight
int f = 0; //cur file
size_t w = 1; //cur weight
size_t f = 0; //cur file
//get weights values
std::cerr << "have " << fileStr.size() << " models\n";
for(size_t j = 0; j < weightsStr.size(); ++j){
@ -354,7 +355,7 @@ bool StaticData::LoadLexicalReorderingModel()
vector<FactorType> input,output;
LexicalReordering::Direction direction;
LexicalReordering::Condition condition;
int numWeights;
size_t numWeights;
//decode factor map
vector<string> inputfactors = Tokenize(spec[0],"-");
if(inputfactors.size() == 2){