moving more stuff out of IOWrapper

This commit is contained in:
Hieu Hoang 2014-12-30 18:53:30 +05:30
parent 14cbf9bc22
commit ba166f109c
14 changed files with 74 additions and 270 deletions

View File

@ -494,7 +494,7 @@ public:
{
// should the score breakdown be reported in a more structured manner?
ostringstream buf;
IOWrapper::OutputAllFeatureScores(path.GetScoreBreakdown(),buf);
path.GetScoreBreakdown().OutputAllFeatureScores(buf);
nBestXMLItem["fvals"] = xmlrpc_c::value_string(buf.str());
}

View File

@ -9,54 +9,6 @@ using namespace std;
namespace Moses
{
void BaseManager::OutputAllFeatureScores(const Moses::ScoreComponentCollection &features,
std::ostream &out) const
{
std::string lastName = "";
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) {
const StatefulFeatureFunction *ff = sff[i];
if (ff->GetScoreProducerDescription() != "BleuScoreFeature"
&& ff->IsTuneable()) {
OutputFeatureScores( out, features, ff, lastName );
}
}
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ ) {
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
OutputFeatureScores( out, features, ff, lastName );
}
}
}
void BaseManager::OutputFeatureScores( std::ostream& out,
const ScoreComponentCollection &features,
const FeatureFunction *ff,
std::string &lastName ) const
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.IsLabeledNBestList();
// regular features (not sparse)
if (ff->GetNumScoreComponents() != 0) {
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
lastName = ff->GetScoreProducerDescription();
out << " " << lastName << "=";
}
vector<float> scores = features.GetScoresForProducer( ff );
for (size_t j = 0; j<scores.size(); ++j) {
out << " " << scores[j];
}
}
// sparse features
const FVector scores = features.GetVectorForProducer( ff );
for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
out << " " << i->first << "= " << i->second;
}
}
/***
* print surface factor only for the given phrase
*/

View File

@ -24,12 +24,6 @@ protected:
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
typedef std::set< std::pair<size_t, size_t> > Alignments;
void OutputAllFeatureScores(const Moses::ScoreComponentCollection &features,
std::ostream &out) const;
void OutputFeatureScores( std::ostream& out,
const ScoreComponentCollection &features,
const FeatureFunction *ff,
std::string &lastName ) const;
void OutputSurface(std::ostream &out,
const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder,

View File

@ -365,7 +365,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| ";
OutputAllFeatureScores(derivation.scoreBreakdown, out);
derivation.scoreBreakdown.OutputAllFeatureScores(out);
out << " ||| " << derivation.score;
// optionally, print word alignments

View File

@ -571,7 +571,7 @@ void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const
out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
IOWrapper::OutputAllFeatureScores(scoreBreakdown, out);
scoreBreakdown.OutputAllFeatureScores(out);
}
out << "| ";
}

View File

@ -258,26 +258,6 @@ GetInput(InputType* inputType)
}
}
std::map<size_t, const Factor*> IOWrapper::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor)
{
const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
const Phrase &inputPhrase = inputPath.GetPhrase();
std::map<size_t, const Factor*> ret;
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
if (factor) {
std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
"Placeholder should be aligned to 1, and only 1, word");
ret[*targetPos.begin()] = factor;
}
}
return ret;
}
void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
{
if (hypo != NULL) {
@ -411,117 +391,6 @@ void IOWrapper::WriteApplicationContext(std::ostream &out,
}
}
/***
* print surface factor only for the given phrase
*/
void IOWrapper::OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Cannot be empty phrase");
if (reportAllFactors == true) {
out << phrase;
} else {
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
UTIL_THROW_IF2(factor == NULL,
"Empty factor 0 at position " << pos);
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"Empty factor " << i << " at position " << pos);
out << "|" << *factor;
}
out << " ";
}
}
}
//////////////////////////////////////////////////////////////////////////
/***
* print surface factor only for the given phrase
*/
void IOWrapper::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors)
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
bool markUnknown = StaticData::Instance().GetMarkUnknown();
if (reportAllFactors == true) {
out << phrase;
} else {
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
std::map<size_t, const Factor*> placeholders;
if (placeholderFactor != NOT_FOUND) {
// creates map of target position -> factor for placeholders
placeholders = GetPlaceholders(edge, placeholderFactor);
}
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
if (placeholders.size()) {
// do placeholders
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
if (iter != placeholders.end()) {
factor = iter->second;
}
}
UTIL_THROW_IF2(factor == NULL,
"No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << "UNK" << *factor;
} else {
out << *factor;
}
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"No factor " << i << " at position " << pos);
out << "|" << *factor;
}
out << " ";
}
}
// trace ("report segmentation") option "-t" / "-tt"
if (reportSegmentation > 0 && phrase.GetSize() > 0) {
const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
const int sourceStart = sourceRange.GetStartPos();
const int sourceEnd = sourceRange.GetEndPos();
out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
if (reportSegmentation == 2) {
out << ",wa=";
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
Hypothesis::OutputAlignment(out, ai, 0, 0);
out << ",total=";
out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
OutputAllFeatureScores(scoreBreakdown, out);
}
out << "| ";
}
}
void IOWrapper::OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
{
ostringstream out;
@ -551,17 +420,6 @@ void IOWrapper::OutputAlignment(OutputCollector* collector, size_t lineNo , cons
}
}
void IOWrapper::OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out)
{
const std::vector<const Hypothesis *> &edges = path.GetEdges();
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
}
out << endl;
}
void IOWrapper::Backtrack(const Hypothesis *hypo)
{
@ -593,54 +451,5 @@ bool IOWrapper::ReadInput(InputTypeEnum inputType, InputType*& source)
return (source ? true : false);
}
void IOWrapper::OutputAllFeatureScores(const Moses::ScoreComponentCollection &features
, std::ostream &out)
{
std::string lastName = "";
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) {
const StatefulFeatureFunction *ff = sff[i];
if (ff->GetScoreProducerDescription() != "BleuScoreFeature"
&& ff->IsTuneable()) {
OutputFeatureScores( out, features, ff, lastName );
}
}
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ ) {
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
OutputFeatureScores( out, features, ff, lastName );
}
}
}
void IOWrapper::OutputFeatureScores( std::ostream& out
, const ScoreComponentCollection &features
, const FeatureFunction *ff
, std::string &lastName )
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.IsLabeledNBestList();
// regular features (not sparse)
if (ff->GetNumScoreComponents() != 0) {
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
lastName = ff->GetScoreProducerDescription();
out << " " << lastName << "=";
}
vector<float> scores = features.GetScoresForProducer( ff );
for (size_t j = 0; j<scores.size(); ++j) {
out << " " << scores[j];
}
}
// sparse features
const FVector scores = features.GetVectorForProducer( ff );
for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
out << " " << i->first << "= " << i->second;
}
}
} // namespace

View File

@ -117,10 +117,6 @@ protected:
void WriteApplicationContext(std::ostream &out,
const ApplicationContext &context);
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors);
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors);
public:
IOWrapper();
~IOWrapper();
@ -170,8 +166,6 @@ public:
// CHART
// phrase-based
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,char reportSegmentation, bool reportAllFactors, std::ostream &out);
void OutputPassthroughInformation(std::string& passthrough, const Moses::Hypothesis* hypo);
void OutputPassthroughInformation(std::ostream& os, const Moses::Hypothesis* hypo);
@ -179,16 +173,6 @@ public:
void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::TrellisPath &path);
void OutputAlignment(OutputCollector* collector, size_t lineNo , const std::vector<const Hypothesis *> &edges);
static void OutputAllFeatureScores(const Moses::ScoreComponentCollection &features
, std::ostream &out);
static void OutputFeatureScores( std::ostream& out
, const Moses::ScoreComponentCollection &features
, const Moses::FeatureFunction *ff
, std::string &lastName );
// creates a map of TARGET positions which should be replaced by word using placeholder
std::map<size_t, const Moses::Factor*> GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor);
// post editing
std::ifstream *spe_src, *spe_trg, *spe_aln;

View File

@ -329,7 +329,7 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| ";
OutputAllFeatureScores(features, out);
features.OutputAllFeatureScores(out);
out << " ||| " << i->GetScore() << '\n';
}
out << std::flush;

View File

@ -1497,7 +1497,7 @@ void Manager::OutputNBest(std::ostream& out
out << " |||";
// print scores with feature names
OutputAllFeatureScores(path.GetScoreBreakdown(), out );
path.GetScoreBreakdown().OutputAllFeatureScores(out );
// total
out << " ||| " << path.GetTotalScore();
@ -1617,7 +1617,7 @@ void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std
out << ",";
ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
OutputAllFeatureScores(scoreBreakdown, out);
scoreBreakdown.OutputAllFeatureScores(out);
}
out << "| ";
}
@ -1864,4 +1864,15 @@ void Manager::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*trans
out << endl;
}
void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out) const
{
const std::vector<const Hypothesis *> &edges = path.GetEdges();
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
}
out << endl;
}
} // namespace

View File

@ -165,6 +165,7 @@ public:
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId) const;
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
char reportSegmentation, bool reportAllFactors, std::ostream& out) const;
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,char reportSegmentation, bool reportAllFactors, std::ostream &out) const;
#ifdef HAVE_PROTOBUF
void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;

View File

@ -3,6 +3,8 @@
#include "util/exception.hh"
#include "ScoreComponentCollection.h"
#include "StaticData.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/FF/StatefulFeatureFunction.h"
using namespace std;
@ -301,6 +303,52 @@ void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const Score
}
}
void ScoreComponentCollection::OutputAllFeatureScores(std::ostream &out) const
{
std::string lastName = "";
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ ) {
const StatefulFeatureFunction *ff = sff[i];
if (ff->GetScoreProducerDescription() != "BleuScoreFeature"
&& ff->IsTuneable()) {
OutputFeatureScores( out, ff, lastName );
}
}
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ ) {
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
OutputFeatureScores( out, ff, lastName );
}
}
}
void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
, const FeatureFunction *ff
, std::string &lastName ) const
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.IsLabeledNBestList();
// regular features (not sparse)
if (ff->GetNumScoreComponents() != 0) {
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
lastName = ff->GetScoreProducerDescription();
out << " " << lastName << "=";
}
vector<float> scores = GetScoresForProducer( ff );
for (size_t j = 0; j<scores.size(); ++j) {
out << " " << scores[j];
}
}
// sparse features
const FVector scores = GetVectorForProducer( ff );
for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
out << " " << i->first << "= " << i->second;
}
}
}

View File

@ -429,6 +429,11 @@ public:
m_scores.merge(other.m_scores);
}
void OutputAllFeatureScores(std::ostream &out) const;
void OutputFeatureScores( std::ostream& out
, const Moses::FeatureFunction *ff
, std::string &lastName ) const;
#ifdef MPI_ENABLE
public:
friend class boost::serialization::access;

View File

@ -82,7 +82,7 @@ void Manager::OutputNBestList(OutputCollector *collector,
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
out << " ||| ";
OutputAllFeatureScores(derivation.scoreBreakdown, out);
derivation.scoreBreakdown.OutputAllFeatureScores(out);
out << " ||| " << derivation.score;
// optionally, print word alignments

View File

@ -190,7 +190,7 @@ void TranslationTask::RunPb()
// consensus decoding
else if (staticData.UseConsensusDecoding()) {
const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList);
m_ioWrapper.OutputBestHypo(conBestHypo, m_source->GetTranslationId(),
manager.OutputBestHypo(conBestHypo, m_source->GetTranslationId(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),out);
m_ioWrapper.OutputAlignment(m_ioWrapper.GetAlignmentInfoCollector(), m_source->GetTranslationId(), conBestHypo);
@ -202,7 +202,7 @@ void TranslationTask::RunPb()
// n-best MBR decoding
else {
const TrellisPath &mbrBestHypo = doMBR(nBestList);
m_ioWrapper.OutputBestHypo(mbrBestHypo, m_source->GetTranslationId(),
manager.OutputBestHypo(mbrBestHypo, m_source->GetTranslationId(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors(),out);
m_ioWrapper.OutputAlignment(m_ioWrapper.GetAlignmentInfoCollector(), m_source->GetTranslationId(), mbrBestHypo);