extended search graph output

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2717 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
phkoehn 2010-01-28 15:32:04 +00:00
parent 558ff92c33
commit cccab3d22b
8 changed files with 97 additions and 39 deletions

View File

@ -144,7 +144,11 @@ void IOWrapper::Initialization(const std::vector<FactorType> &inputFactorOrder
// search graph output
if (staticData.GetOutputSearchGraph())
{
string fileName = staticData.GetParam("output-search-graph")[0];
string fileName;
if (staticData.GetOutputSearchGraphExtended())
fileName = staticData.GetParam("output-search-graph-extended")[0];
else
fileName = staticData.GetParam("output-search-graph")[0];
std::ofstream *file = new std::ofstream;
m_outputSearchGraphStream = file;
file->open(fileName.c_str());

View File

@ -106,8 +106,8 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
os << "\tdropped=" << *dwi << std::endl;
}
}
os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED):" << std::endl;
StaticData::Instance().GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): ";
StaticData::Instance().GetScoreIndexManager().PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
os << std::endl;
}

View File

@ -323,33 +323,59 @@ void Manager::GetWordGraph(long translationId, std::ostream &outputWordGraphStre
void OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const Hypothesis *hypo, const Hypothesis *recombinationHypo, int forward, double fscore)
{
outputSearchGraphStream << translationId
<< " hyp=" << hypo->GetId()
<< " stack=" << hypo->GetWordsBitmap().GetNumWordsCovered();
if (hypo->GetId() > 0)
const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
bool extendedFormat = StaticData::Instance().GetOutputSearchGraphExtended();
outputSearchGraphStream << translationId;
// special case: initial hypothesis
if ( hypo->GetId() == 0 )
{
const Hypothesis *prevHypo = hypo->GetPrevHypo();
outputSearchGraphStream << " back=" << prevHypo->GetId()
<< " score=" << hypo->GetScore()
<< " transition=" << (hypo->GetScore() - prevHypo->GetScore());
outputSearchGraphStream << " hyp=0 stack=0";
if (!extendedFormat)
{
outputSearchGraphStream << " forward=" << forward << " fscore=" << fscore;
}
outputSearchGraphStream << endl;
return;
}
if (recombinationHypo != NULL)
const Hypothesis *prevHypo = hypo->GetPrevHypo();
// output in traditional format
if (!extendedFormat)
{
outputSearchGraphStream << " recombined=" << recombinationHypo->GetId();
outputSearchGraphStream << " hyp=" << hypo->GetId()
<< " stack=" << hypo->GetWordsBitmap().GetNumWordsCovered()
<< " back=" << prevHypo->GetId()
<< " score=" << hypo->GetScore()
<< " transition=" << (hypo->GetScore() - prevHypo->GetScore());
if (recombinationHypo != NULL)
outputSearchGraphStream << " recombined=" << recombinationHypo->GetId();
outputSearchGraphStream << " forward=" << forward << " fscore=" << fscore
<< " covered=" << hypo->GetCurrSourceWordsRange().GetStartPos()
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos()
<< " out=" << hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder)
<< endl;
return;
}
// output in extended format
if (recombinationHypo != NULL)
outputSearchGraphStream << " hyp=" << recombinationHypo->GetId();
else
outputSearchGraphStream << " hyp=" << hypo->GetId();
outputSearchGraphStream << " forward=" << forward
<< " fscore=" << fscore;
outputSearchGraphStream << " back=" << prevHypo->GetId();
if (hypo->GetId() > 0)
{
outputSearchGraphStream << " covered=" << hypo->GetCurrSourceWordsRange().GetStartPos()
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos()
<< " out=" << hypo->GetCurrTargetPhrase();
}
ScoreComponentCollection scoreBreakdown = hypo->GetScoreBreakdown();
scoreBreakdown.MinusEquals( prevHypo->GetScoreBreakdown() );
outputSearchGraphStream << " [ ";
StaticData::Instance().GetScoreIndexManager().PrintLabeledScores( outputSearchGraphStream, scoreBreakdown );
outputSearchGraphStream << " ]";
outputSearchGraphStream << endl;
outputSearchGraphStream << " out=" << hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) << endl;
}
void Manager::GetConnectedGraph(

View File

@ -97,6 +97,7 @@ Parameter::Parameter()
AddParam("output-word-graph", "owg", "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos");
AddParam("time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
#ifdef HAVE_PROTOBUF
AddParam("output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");
#endif

View File

@ -39,21 +39,34 @@ void ScoreIndexManager::AddScoreProducer(const ScoreProducer* sp)
*/
}
void ScoreIndexManager::Debug_PrintLabeledScores(std::ostream& os, const ScoreComponentCollection& scc) const
void ScoreIndexManager::PrintLabeledScores(std::ostream& os, const ScoreComponentCollection& scores) const
{
std::vector<float> weights(scc.m_scores.size(), 1.0f);
Debug_PrintLabeledWeightedScores(os, scc, weights);
std::vector<float> weights(scores.m_scores.size(), 1.0f);
PrintLabeledWeightedScores(os, scores, weights);
}
void ScoreIndexManager::Debug_PrintLabeledWeightedScores(std::ostream& os, const ScoreComponentCollection& scc, const std::vector<float>& weights) const
void ScoreIndexManager::PrintLabeledWeightedScores(std::ostream& os, const ScoreComponentCollection& scores, const std::vector<float>& weights) const
{
assert(m_featureNames.size() == weights.size());
for (size_t i = 0; i < m_featureNames.size(); ++i)
os << m_featureNames[i] << "\t" << weights[i] << endl;
assert(m_featureShortNames.size() == weights.size());
string lastName = "";
for (size_t i = 0; i < m_featureShortNames.size(); ++i)
{
if (i>0)
{
os << " ";
}
if (lastName != m_featureShortNames[i])
{
os << m_featureShortNames[i] << ": ";
lastName = m_featureShortNames[i];
}
os << weights[i] * scores[i];
}
}
void ScoreIndexManager::InitFeatureNames() {
m_featureNames.clear();
m_featureShortNames.clear();
size_t cur_i = 0;
size_t cur_scoreType = 0;
while (cur_i < m_last) {
@ -77,6 +90,7 @@ void ScoreIndexManager::InitFeatureNames() {
if (add_idx)
os << '_' << ind;
m_featureNames.push_back(os.str());
m_featureShortNames.push_back( m_producers[cur_scoreType]->GetScoreProducerWeightShortName() );
++cur_i;
++ind;
}

View File

@ -40,10 +40,10 @@ public:
size_t GetEndIndex(size_t scoreBookkeepingID) const { return m_ends[scoreBookkeepingID]; }
//! sum of all score components from every score producer
size_t GetTotalNumberOfScores() const { return m_last; }
//! ??? print unweighted scores of each ScoreManager to stream os
void Debug_PrintLabeledScores(std::ostream& os, const ScoreComponentCollection& scc) const;
//! ??? print weighted scores of each ScoreManager to stream os
void Debug_PrintLabeledWeightedScores(std::ostream& os, const ScoreComponentCollection& scc, const std::vector<float>& weights) const;
//! print unweighted scores of each ScoreManager to stream os
void PrintLabeledScores(std::ostream& os, const ScoreComponentCollection& scc) const;
//! print weighted scores of each ScoreManager to stream os
void PrintLabeledWeightedScores(std::ostream& os, const ScoreComponentCollection& scc, const std::vector<float>& weights) const;
#ifdef HAVE_PROTOBUF
void SerializeFeatureNamesToPB(hgmert::Hypergraph* hg) const;
#endif
@ -60,6 +60,7 @@ private:
std::vector<const StatefulFeatureFunction*> m_stateful; /**< all the score producers in this run */
std::vector<const StatelessFeatureFunction*> m_stateless; /**< all the score producers in this run */
std::vector<std::string> m_featureNames;
std::vector<std::string> m_featureShortNames;
size_t m_last;
};

View File

@ -172,7 +172,17 @@ bool StaticData::LoadData(Parameter *parameter)
}
m_outputSearchGraph = true;
}
else
// ... in extended format
else if (m_parameter->GetParam("output-search-graph-extended").size() > 0)
{
if (m_parameter->GetParam("output-search-graph-extended").size() != 1) {
UserMessage::Add(string("ERROR: wrong format for switch -output-search-graph-extended file"));
return false;
}
m_outputSearchGraph = true;
m_outputSearchGraphExtended = true;
}
else
m_outputSearchGraph = false;
#ifdef HAVE_PROTOBUF
if (m_parameter->GetParam("output-search-graph-pb").size() > 0)
@ -255,15 +265,15 @@ bool StaticData::LoadData(Parameter *parameter)
m_weightWordPenalty = Scan<float>( m_parameter->GetParam("weight-w")[0] );
m_weightUnknownWord = (m_parameter->GetParam("weight-u").size() > 0) ? Scan<float>(m_parameter->GetParam("weight-u")[0]) : 1;
m_distortionScoreProducer = new DistortionScoreProducer(m_scoreIndexManager);
m_allWeights.push_back(m_weightDistortion);
m_wpProducer = new WordPenaltyProducer(m_scoreIndexManager);
m_allWeights.push_back(m_weightWordPenalty);
m_unknownWordPenaltyProducer = new UnknownWordPenaltyProducer(m_scoreIndexManager);
m_allWeights.push_back(m_weightUnknownWord);
m_distortionScoreProducer = new DistortionScoreProducer(m_scoreIndexManager);
m_allWeights.push_back(m_weightDistortion);
// reordering constraints
m_maxDistortion = (m_parameter->GetParam("distortion-limit").size() > 0) ?
Scan<int>(m_parameter->GetParam("distortion-limit")[0])

View File

@ -159,7 +159,8 @@ protected:
//! constructor. only the 1 static variable can be created
bool m_outputWordGraph; //! whether to output word graph
bool m_outputSearchGraph; //! whether to output search graph
bool m_outputSearchGraph; //! whether to output search graph
bool m_outputSearchGraphExtended; //! ... in extended format
#ifdef HAVE_PROTOBUF
bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
#endif
@ -457,7 +458,8 @@ public:
bool UseTimeout() const { return m_timeout; }
size_t GetTimeoutThreshold() const { return m_timeout_threshold; }
size_t GetOutputSearchGraph() const { return m_outputSearchGraph; }
bool GetOutputSearchGraph() const { return m_outputSearchGraph; }
bool GetOutputSearchGraphExtended() const { return m_outputSearchGraphExtended; }
#ifdef HAVE_PROTOBUF
bool GetOutputSearchGraphPB() const { return m_outputSearchGraphPB; }
#endif