Ales Tamchyna's printing of alignments (-print-alignment-info did nothing)

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3867 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bojar 2011-02-03 09:08:42 +00:00
parent 72945c543e
commit 0bc0ece594
6 changed files with 76 additions and 11 deletions

View File

@ -33,6 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
// example file on how to use moses library
#include <iostream>
#include <stack>
#include "TypeDef.h"
#include "Util.h"
#include "IOWrapper.h"
@ -61,6 +62,7 @@ IOWrapper::IOWrapper(
,m_outputWordGraphStream(NULL)
,m_outputSearchGraphStream(NULL)
,m_detailedTranslationReportingStream(NULL)
,m_alignmentOutputStream(NULL)
{
Initialization(inputFactorOrder, outputFactorOrder
, inputFactorUsed
@ -82,6 +84,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
,m_outputWordGraphStream(NULL)
,m_outputSearchGraphStream(NULL)
,m_detailedTranslationReportingStream(NULL)
,m_alignmentOutputStream(NULL)
{
Initialization(inputFactorOrder, outputFactorOrder
, inputFactorUsed
@ -107,6 +110,7 @@ IOWrapper::~IOWrapper()
delete m_outputSearchGraphStream;
}
delete m_detailedTranslationReportingStream;
delete m_alignmentOutputStream;
}
void IOWrapper::Initialization(const std::vector<FactorType> &/*inputFactorOrder*/
@ -144,7 +148,8 @@ void IOWrapper::Initialization(const std::vector<FactorType> &/*inputFactorOrder
file->open(fileName.c_str());
}
// search graph output
// search graph output
if (staticData.GetOutputSearchGraph())
{
string fileName;
@ -164,6 +169,14 @@ void IOWrapper::Initialization(const std::vector<FactorType> &/*inputFactorOrder
m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
assert(m_detailedTranslationReportingStream->good());
}
// sentence alignment output
if (! staticData.GetAlignmentOutputFile().empty())
{
m_alignmentOutputStream = new ofstream(staticData.GetAlignmentOutputFile().c_str());
assert(m_alignmentOutputStream->good());
}
}
InputType*IOWrapper::GetInput(InputType* inputType)
@ -211,11 +224,39 @@ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<Fa
}
void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
,bool reportSegmentation, bool reportAllFactors)
,bool reportSegmentation, bool reportAllFactors, std::ofstream *alignmentStream)
{
if ( hypo != NULL)
{
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
if (! StaticData::Instance().GetAlignmentOutputFile().empty() && alignmentStream)
{
size_t targetOffset = 0;
std::stack<const Hypothesis *> edges;
const Hypothesis *currentHypo = hypo;
while (currentHypo)
{
edges.push(currentHypo);
currentHypo = currentHypo->GetPrevHypo();
}
while (!edges.empty())
{
const Hypothesis &edge = *edges.top();
edges.pop();
const TargetPhrase &tp = edge.GetCurrTargetPhrase();
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
AlignmentInfo::const_iterator it;
for (it = tp.GetAlignmentInfo().begin(); it != tp.GetAlignmentInfo().end(); ++it)
{
*alignmentStream << it->first + sourceOffset << "-" << it->second + targetOffset << " ";
}
targetOffset += tp.GetSize();
}
*alignmentStream << std::endl;
}
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors, NULL);
OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
if (reportSegmentation == true
@ -233,6 +274,7 @@ void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge.GetCurrTargetPhrase(), StaticData::Instance().GetOutputFactorOrder(), reportAllFactors);
if (reportSegmentation == true
&& edge.GetCurrTargetPhrase().GetSize() > 0) {
@ -296,7 +338,7 @@ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, b
OutputInput(cout, hypo);
cout << "||| ";
}
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors, NULL);
cout << endl;
}
}

View File

@ -65,6 +65,7 @@ protected:
std::ostream *m_nBestStream
,*m_outputWordGraphStream,*m_outputSearchGraphStream;
std::ostream *m_detailedTranslationReportingStream;
std::ofstream *m_alignmentOutputStream;
bool m_surpressSingleBestOutput;
void Initialization(const std::vector<Moses::FactorType> &inputFactorOrder
@ -96,6 +97,11 @@ public:
void ResetTranslationId() { m_translationId = 0; }
std::ofstream *GetAlignmentOutputStream()
{
return m_alignmentOutputStream;
}
std::ostream &GetOutputWordGraphStream()
{
return *m_outputWordGraphStream;
@ -114,7 +120,7 @@ public:
IOWrapper *GetIODevice(const Moses::StaticData &staticData);
bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors);
void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors, std::ofstream *alignmentStream);
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
const TranslationSystem* system, long translationId);
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);

View File

@ -58,7 +58,6 @@ void fix(std::ostream& stream, size_t size) {
stream.precision(size);
}
/**
* Translates a sentence.
**/
@ -69,11 +68,12 @@ class TranslationTask : public Task {
TranslationTask(size_t lineNumber,
InputType* source, OutputCollector* outputCollector, OutputCollector* nbestCollector,
OutputCollector* wordGraphCollector, OutputCollector* searchGraphCollector,
OutputCollector* detailedTranslationCollector) :
OutputCollector* detailedTranslationCollector, std::ofstream *alignmentStream ) :
m_source(source), m_lineNumber(lineNumber),
m_outputCollector(outputCollector), m_nbestCollector(nbestCollector),
m_wordGraphCollector(wordGraphCollector), m_searchGraphCollector(searchGraphCollector),
m_detailedTranslationCollector(detailedTranslationCollector) {}
m_detailedTranslationCollector(detailedTranslationCollector),
m_alignmentStream(alignmentStream) {}
void Run()
{
@ -139,7 +139,8 @@ class TranslationTask : public Task {
bestHypo,
staticData.GetOutputFactorOrder(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors());
staticData.GetReportAllFactors(),
m_alignmentStream);
IFVERBOSE(1) {
debug << "BEST TRANSLATION: " << *bestHypo << endl;
}
@ -231,6 +232,7 @@ class TranslationTask : public Task {
OutputCollector* m_wordGraphCollector;
OutputCollector* m_searchGraphCollector;
OutputCollector* m_detailedTranslationCollector;
std::ofstream *m_alignmentStream;
};
@ -384,7 +386,7 @@ int main(int argc, char** argv) {
}
TranslationTask* task =
new TranslationTask(lineCount,source, outputCollector.get(), nbestCollector.get(), wordGraphCollector.get(),
searchGraphCollector.get(), detailedTranslationCollector.get());
searchGraphCollector.get(), detailedTranslationCollector.get(), ioWrapper->GetAlignmentOutputStream());
#ifdef WITH_THREADS
pool.Submit(task);

View File

@ -133,6 +133,7 @@ Parameter::Parameter()
AddParam("unknown-lhs", "file containing target lhs of unknown words. 1 per line: LHS prob");
AddParam("translation-systems", "specify multiple translation systems, each consisting of an id, followed by a set of models ids, eg '0 T1 R1 L0'");
AddParam("show-weights", "print feature weights and exit");
AddParam("alignment-output-file", "print output word alignments into given file");
}
Parameter::~Parameter()

View File

@ -143,6 +143,11 @@ bool StaticData::LoadData(Parameter *parameter)
TRACE_ERR("--print-alignment-info-in-n-best should only be used together with \"--use-alignment-info true\". Continue forcing to false.\n");
m_PrintAlignmentInfoNbest=false;
}
if (m_parameter->GetParam("alignment-output-file").size() > 0)
{
m_alignmentOutputFile = Scan<std::string>(m_parameter->GetParam("alignment-output-file")[0]);
}
// n-best
if (m_parameter->GetParam("n-best-list").size() >= 2)

View File

@ -29,6 +29,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <map>
#include <memory>
#include <utility>
#include <fstream>
#include <string>
#ifdef WITH_THREADS
#include <boost/thread/mutex.hpp>
@ -144,7 +146,8 @@ protected:
bool m_UseAlignmentInfo;
bool m_PrintAlignmentInfo;
bool m_PrintAlignmentInfoNbest;
std::string m_alignmentOutputFile;
std::string m_factorDelimiter; //! by default, |, but it can be changed
size_t m_maxFactorIdx[2]; //! number of factors on source and target side
@ -191,6 +194,7 @@ protected:
size_t m_cubePruningDiversity;
size_t m_ruleLimit;
// Initial = 0 = can be used when creating poss trans
// Other = 1 = used to calculate LM score once all steps have been processed
Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
@ -401,6 +405,11 @@ public:
{
return m_detailedTranslationReportingFilePath;
}
const std::string &GetAlignmentOutputFile() const
{
return m_alignmentOutputFile;
}
bool IsLabeledNBestList() const
{