2010-05-27 12:37:25 +04:00
|
|
|
// $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
2006-07-04 22:04:38 +04:00
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
2010-05-27 12:37:25 +04:00
|
|
|
Copyright (C) 2009 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
2006-07-04 22:04:38 +04:00
|
|
|
***********************************************************************/
|
|
|
|
|
2010-05-27 12:37:25 +04:00
|
|
|
/**
|
|
|
|
* Moses main, for single-threaded and multi-threaded.
|
|
|
|
**/
|
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
#include <exception>
|
2010-05-27 12:37:25 +04:00
|
|
|
#include <fstream>
|
|
|
|
#include <sstream>
|
|
|
|
#include <vector>
|
2006-07-04 22:04:38 +04:00
|
|
|
|
2012-10-05 20:49:52 +04:00
|
|
|
#include "util/usage.hh"
|
|
|
|
|
2006-07-04 22:04:38 +04:00
|
|
|
#ifdef WIN32
|
|
|
|
// Include Visual Leak Detector
|
2011-07-24 03:52:34 +04:00
|
|
|
//#include <vld.h>
|
2006-07-04 22:04:38 +04:00
|
|
|
#endif
|
|
|
|
|
2012-11-13 00:21:32 +04:00
|
|
|
#include "TranslationAnalysis.h"
|
2010-05-27 12:37:25 +04:00
|
|
|
#include "IOWrapper.h"
|
2007-05-16 00:54:39 +04:00
|
|
|
#include "mbr.h"
|
2012-11-13 00:21:32 +04:00
|
|
|
|
|
|
|
#include "moses/Hypothesis.h"
|
|
|
|
#include "moses/Manager.h"
|
|
|
|
#include "moses/StaticData.h"
|
|
|
|
#include "moses/Util.h"
|
|
|
|
#include "moses/Timer.h"
|
|
|
|
#include "moses/ThreadPool.h"
|
|
|
|
#include "moses/OutputCollector.h"
|
2008-09-24 20:48:23 +04:00
|
|
|
|
|
|
|
#ifdef HAVE_PROTOBUF
|
|
|
|
#include "hypergraph.pb.h"
|
2006-07-04 22:04:38 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
using namespace std;
|
2008-10-09 03:51:26 +04:00
|
|
|
using namespace Moses;
|
2012-07-02 20:05:11 +04:00
|
|
|
using namespace MosesCmd;
|
2006-08-08 01:18:13 +04:00
|
|
|
|
2012-07-02 20:05:11 +04:00
|
|
|
namespace MosesCmd
|
|
|
|
{
|
2011-03-02 22:02:07 +03:00
|
|
|
// output floats with three significant digits
|
2010-11-29 19:44:28 +03:00
|
|
|
static const size_t PRECISION = 3;
|
|
|
|
|
2010-05-27 12:37:25 +04:00
|
|
|
/** Enforce rounding */
|
2011-02-24 15:39:29 +03:00
|
|
|
void fix(std::ostream& stream, size_t size)
|
|
|
|
{
|
|
|
|
stream.setf(std::ios::fixed);
|
|
|
|
stream.precision(size);
|
2010-05-27 12:37:25 +04:00
|
|
|
}
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
/** Translates a sentence.
|
|
|
|
* - calls the search (Manager)
|
|
|
|
* - applies the decision rule
|
|
|
|
* - outputs best translation and additional reporting
|
2010-05-27 12:37:25 +04:00
|
|
|
**/
|
2011-02-24 15:39:29 +03:00
|
|
|
class TranslationTask : public Task
|
|
|
|
{
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
TranslationTask(size_t lineNumber,
|
|
|
|
InputType* source, OutputCollector* outputCollector, OutputCollector* nbestCollector,
|
2011-10-04 19:46:24 +04:00
|
|
|
OutputCollector* latticeSamplesCollector,
|
2011-02-24 15:39:29 +03:00
|
|
|
OutputCollector* wordGraphCollector, OutputCollector* searchGraphCollector,
|
|
|
|
OutputCollector* detailedTranslationCollector,
|
2012-09-21 11:55:37 +04:00
|
|
|
OutputCollector* alignmentInfoCollector,
|
2013-02-15 22:06:54 +04:00
|
|
|
OutputCollector* unknownsCollector,
|
2013-03-04 21:07:37 +04:00
|
|
|
bool outputSearchGraphSLF,
|
|
|
|
bool outputSearchGraphHypergraph) :
|
2011-02-24 15:39:29 +03:00
|
|
|
m_source(source), m_lineNumber(lineNumber),
|
|
|
|
m_outputCollector(outputCollector), m_nbestCollector(nbestCollector),
|
2011-10-04 19:46:24 +04:00
|
|
|
m_latticeSamplesCollector(latticeSamplesCollector),
|
2011-02-24 15:39:29 +03:00
|
|
|
m_wordGraphCollector(wordGraphCollector), m_searchGraphCollector(searchGraphCollector),
|
|
|
|
m_detailedTranslationCollector(detailedTranslationCollector),
|
2012-09-21 11:55:37 +04:00
|
|
|
m_alignmentInfoCollector(alignmentInfoCollector),
|
2013-02-15 22:06:54 +04:00
|
|
|
m_unknownsCollector(unknownsCollector),
|
2013-03-04 21:07:37 +04:00
|
|
|
m_outputSearchGraphSLF(outputSearchGraphSLF),
|
|
|
|
m_outputSearchGraphHypergraph(outputSearchGraphHypergraph) {}
|
2011-02-24 15:39:29 +03:00
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
/** Translate one sentence
|
|
|
|
* gets called by main function implemented at end of this source file */
|
2011-02-24 15:39:29 +03:00
|
|
|
void Run() {
|
2011-03-02 22:02:07 +03:00
|
|
|
|
|
|
|
// report thread number
|
2012-09-22 02:34:48 +04:00
|
|
|
#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
|
2011-02-24 15:39:29 +03:00
|
|
|
TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << pthread_self() << std::endl);
|
2010-05-27 12:37:25 +04:00
|
|
|
#endif
|
2011-03-02 22:02:07 +03:00
|
|
|
|
2012-08-10 23:32:00 +04:00
|
|
|
Timer translationTime;
|
|
|
|
translationTime.start();
|
2011-03-02 22:02:07 +03:00
|
|
|
// shorthand for "global data"
|
2011-02-24 15:39:29 +03:00
|
|
|
const StaticData &staticData = StaticData::Instance();
|
2011-03-02 22:02:07 +03:00
|
|
|
// input sentence
|
2011-11-21 15:14:05 +04:00
|
|
|
Sentence sentence();
|
2011-03-02 22:02:07 +03:00
|
|
|
// set translation system
|
2011-02-24 15:39:29 +03:00
|
|
|
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
|
2011-03-02 22:02:07 +03:00
|
|
|
|
|
|
|
// execute the translation
|
|
|
|
// note: this executes the search, resulting in a search graph
|
|
|
|
// we still need to apply the decision rule (MAP, MBR, ...)
|
2012-08-10 23:32:00 +04:00
|
|
|
Manager manager(m_lineNumber, *m_source,staticData.GetSearchAlgorithm(), &system);
|
2011-02-24 15:39:29 +03:00
|
|
|
manager.ProcessSentence();
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
// output word graph
|
2011-02-24 15:39:29 +03:00
|
|
|
if (m_wordGraphCollector) {
|
|
|
|
ostringstream out;
|
|
|
|
fix(out,PRECISION);
|
|
|
|
manager.GetWordGraph(m_lineNumber, out);
|
|
|
|
m_wordGraphCollector->Write(m_lineNumber, out.str());
|
|
|
|
}
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
// output search graph
|
2011-02-24 15:39:29 +03:00
|
|
|
if (m_searchGraphCollector) {
|
|
|
|
ostringstream out;
|
|
|
|
fix(out,PRECISION);
|
|
|
|
manager.OutputSearchGraph(m_lineNumber, out);
|
|
|
|
m_searchGraphCollector->Write(m_lineNumber, out.str());
|
2008-03-18 00:34:19 +03:00
|
|
|
|
2008-09-24 20:48:23 +04:00
|
|
|
#ifdef HAVE_PROTOBUF
|
2011-02-24 15:39:29 +03:00
|
|
|
if (staticData.GetOutputSearchGraphPB()) {
|
|
|
|
ostringstream sfn;
|
|
|
|
sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << m_lineNumber << ".pb" << ends;
|
|
|
|
string fn = sfn.str();
|
|
|
|
VERBOSE(2, "Writing search graph to " << fn << endl);
|
|
|
|
fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
|
|
|
|
manager.SerializeSearchGraphPB(m_lineNumber, output);
|
|
|
|
}
|
2008-09-24 20:48:23 +04:00
|
|
|
#endif
|
2011-03-02 22:02:07 +03:00
|
|
|
}
|
2010-05-27 12:37:25 +04:00
|
|
|
|
2013-02-15 22:06:54 +04:00
|
|
|
// Output search graph in HTK standard lattice format (SLF)
|
2013-03-04 21:07:37 +04:00
|
|
|
if (m_outputSearchGraphSLF) {
|
|
|
|
stringstream fileName;
|
|
|
|
fileName << staticData.GetParam("output-search-graph-slf")[0] << "/" << m_lineNumber << ".slf";
|
|
|
|
std::ofstream *file = new std::ofstream;
|
|
|
|
file->open(fileName.str().c_str());
|
|
|
|
if (file->is_open() && file->good()) {
|
2013-02-15 22:06:54 +04:00
|
|
|
ostringstream out;
|
|
|
|
fix(out,PRECISION);
|
|
|
|
manager.OutputSearchGraphAsSLF(m_lineNumber, out);
|
2013-03-04 21:07:37 +04:00
|
|
|
*file << out.str();
|
|
|
|
file -> flush();
|
2013-02-15 22:06:54 +04:00
|
|
|
} else {
|
|
|
|
TRACE_ERR("Cannot output HTK standard lattice for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-22 21:24:35 +04:00
|
|
|
// Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
|
2013-03-04 21:07:37 +04:00
|
|
|
if (m_outputSearchGraphHypergraph) {
|
|
|
|
stringstream fileName;
|
|
|
|
fileName << staticData.GetParam("output-search-graph-hypergraph")[0] << "/" << m_lineNumber;
|
|
|
|
std::ofstream *file = new std::ofstream;
|
|
|
|
file->open(fileName.str().c_str());
|
|
|
|
if (file->is_open() && file->good()) {
|
2013-02-22 21:24:35 +04:00
|
|
|
ostringstream out;
|
|
|
|
fix(out,PRECISION);
|
|
|
|
manager.OutputSearchGraphAsHypergraph(m_lineNumber, out);
|
2013-03-04 21:07:37 +04:00
|
|
|
*file << out.str();
|
|
|
|
file -> flush();
|
2013-02-22 21:24:35 +04:00
|
|
|
} else {
|
|
|
|
TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl);
|
|
|
|
}
|
2013-03-04 21:07:37 +04:00
|
|
|
file -> close();
|
|
|
|
delete file;
|
2013-02-22 21:24:35 +04:00
|
|
|
}
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
// apply decision rule and output best translation(s)
|
2011-02-24 15:39:29 +03:00
|
|
|
if (m_outputCollector) {
|
|
|
|
ostringstream out;
|
|
|
|
ostringstream debug;
|
|
|
|
fix(debug,PRECISION);
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
// all derivations - send them to debug stream
|
2011-02-24 15:39:29 +03:00
|
|
|
if (staticData.PrintAllDerivations()) {
|
|
|
|
manager.PrintAllDerivations(m_lineNumber, debug);
|
|
|
|
}
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
// MAP decoding: best hypothesis
|
2011-02-24 15:39:29 +03:00
|
|
|
const Hypothesis* bestHypo = NULL;
|
2011-03-02 22:02:07 +03:00
|
|
|
if (!staticData.UseMBR())
|
2013-03-13 16:12:33 +04:00
|
|
|
{
|
2011-02-24 15:39:29 +03:00
|
|
|
bestHypo = manager.GetBestHypothesis();
|
|
|
|
if (bestHypo) {
|
|
|
|
if (staticData.IsPathRecoveryEnabled()) {
|
|
|
|
OutputInput(out, bestHypo);
|
|
|
|
out << "||| ";
|
|
|
|
}
|
2013-02-11 22:01:33 +04:00
|
|
|
if (staticData.GetParam("print-id").size() && Scan<bool>(staticData.GetParam("print-id")[0]) ) {
|
|
|
|
out << m_source->GetTranslationId() << " ";
|
|
|
|
}
|
|
|
|
|
2012-04-19 01:09:02 +04:00
|
|
|
OutputBestSurface(
|
2011-02-24 15:39:29 +03:00
|
|
|
out,
|
|
|
|
bestHypo,
|
|
|
|
staticData.GetOutputFactorOrder(),
|
|
|
|
staticData.GetReportSegmentation(),
|
|
|
|
staticData.GetReportAllFactors());
|
2013-03-13 16:12:33 +04:00
|
|
|
if (staticData.PrintAlignmentInfo()) {
|
|
|
|
out << "||| ";
|
|
|
|
OutputAlignment(out, bestHypo);
|
|
|
|
}
|
|
|
|
|
2011-02-24 15:39:29 +03:00
|
|
|
OutputAlignment(m_alignmentInfoCollector, m_lineNumber, bestHypo);
|
|
|
|
IFVERBOSE(1) {
|
|
|
|
debug << "BEST TRANSLATION: " << *bestHypo << endl;
|
|
|
|
}
|
2010-02-18 17:15:34 +03:00
|
|
|
}
|
2011-02-24 15:39:29 +03:00
|
|
|
out << endl;
|
2013-03-13 16:12:33 +04:00
|
|
|
}
|
2011-03-02 22:02:07 +03:00
|
|
|
|
|
|
|
// MBR decoding (n-best MBR, lattice MBR, consensus)
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// we first need the n-best translations
|
2011-02-24 15:39:29 +03:00
|
|
|
size_t nBestSize = staticData.GetMBRSize();
|
|
|
|
if (nBestSize <= 0) {
|
|
|
|
cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
TrellisPathList nBestList;
|
|
|
|
manager.CalcNBest(nBestSize, nBestList,true);
|
|
|
|
VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
|
|
|
|
IFVERBOSE(2) {
|
|
|
|
PrintUserTime("calculated n-best list for (L)MBR decoding");
|
2010-02-18 17:15:34 +03:00
|
|
|
}
|
2010-05-27 12:37:25 +04:00
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
// lattice MBR
|
2011-02-24 15:39:29 +03:00
|
|
|
if (staticData.UseLatticeMBR()) {
|
|
|
|
if (m_nbestCollector) {
|
|
|
|
//lattice mbr nbest
|
|
|
|
vector<LatticeMBRSolution> solutions;
|
|
|
|
size_t n = min(nBestSize, staticData.GetNBestSize());
|
|
|
|
getLatticeMBRNBest(manager,nBestList,solutions,n);
|
|
|
|
ostringstream out;
|
|
|
|
OutputLatticeMBRNBest(out, solutions,m_lineNumber);
|
|
|
|
m_nbestCollector->Write(m_lineNumber, out.str());
|
|
|
|
} else {
|
|
|
|
//Lattice MBR decoding
|
|
|
|
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
|
|
|
|
OutputBestHypo(mbrBestHypo, m_lineNumber, staticData.GetReportSegmentation(),
|
|
|
|
staticData.GetReportAllFactors(),out);
|
|
|
|
IFVERBOSE(2) {
|
|
|
|
PrintUserTime("finished Lattice MBR decoding");
|
|
|
|
}
|
|
|
|
}
|
2011-03-02 22:02:07 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// consensus decoding
|
|
|
|
else if (staticData.UseConsensusDecoding()) {
|
2011-02-24 15:39:29 +03:00
|
|
|
const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList);
|
|
|
|
OutputBestHypo(conBestHypo, m_lineNumber,
|
|
|
|
staticData.GetReportSegmentation(),
|
|
|
|
staticData.GetReportAllFactors(),out);
|
|
|
|
OutputAlignment(m_alignmentInfoCollector, m_lineNumber, conBestHypo);
|
|
|
|
IFVERBOSE(2) {
|
|
|
|
PrintUserTime("finished Consensus decoding");
|
|
|
|
}
|
2011-03-02 22:02:07 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// n-best MBR decoding
|
|
|
|
else {
|
2011-02-24 15:39:29 +03:00
|
|
|
const Moses::TrellisPath &mbrBestHypo = doMBR(nBestList);
|
|
|
|
OutputBestHypo(mbrBestHypo, m_lineNumber,
|
|
|
|
staticData.GetReportSegmentation(),
|
|
|
|
staticData.GetReportAllFactors(),out);
|
|
|
|
OutputAlignment(m_alignmentInfoCollector, m_lineNumber, mbrBestHypo);
|
|
|
|
IFVERBOSE(2) {
|
|
|
|
PrintUserTime("finished MBR decoding");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-03-02 22:02:07 +03:00
|
|
|
|
|
|
|
// report best translation to output collector
|
2011-02-24 15:39:29 +03:00
|
|
|
m_outputCollector->Write(m_lineNumber,out.str(),debug.str());
|
|
|
|
}
|
2011-03-02 22:02:07 +03:00
|
|
|
|
|
|
|
// output n-best list
|
2011-02-24 15:39:29 +03:00
|
|
|
if (m_nbestCollector && !staticData.UseLatticeMBR()) {
|
|
|
|
TrellisPathList nBestList;
|
|
|
|
ostringstream out;
|
|
|
|
manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
|
2012-12-07 17:34:44 +04:00
|
|
|
OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), *manager.GetTranslationSystem(), m_lineNumber,
|
2012-04-19 01:09:02 +04:00
|
|
|
staticData.GetReportSegmentation());
|
2011-02-24 15:39:29 +03:00
|
|
|
m_nbestCollector->Write(m_lineNumber, out.str());
|
|
|
|
}
|
2010-05-27 12:37:25 +04:00
|
|
|
|
2011-10-04 19:46:24 +04:00
|
|
|
//lattice samples
|
|
|
|
if (m_latticeSamplesCollector) {
|
|
|
|
TrellisPathList latticeSamples;
|
|
|
|
ostringstream out;
|
|
|
|
manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
|
2012-12-07 17:34:44 +04:00
|
|
|
OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), *manager.GetTranslationSystem(), m_lineNumber,
|
2012-04-19 01:09:02 +04:00
|
|
|
staticData.GetReportSegmentation());
|
2011-10-04 19:46:24 +04:00
|
|
|
m_latticeSamplesCollector->Write(m_lineNumber, out.str());
|
|
|
|
}
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
// detailed translation reporting
|
2011-02-24 15:39:29 +03:00
|
|
|
if (m_detailedTranslationCollector) {
|
|
|
|
ostringstream out;
|
|
|
|
fix(out,PRECISION);
|
|
|
|
TranslationAnalysis::PrintTranslationAnalysis(manager.GetTranslationSystem(), out, manager.GetBestHypothesis());
|
|
|
|
m_detailedTranslationCollector->Write(m_lineNumber,out.str());
|
|
|
|
}
|
|
|
|
|
2012-09-21 11:55:37 +04:00
|
|
|
//list of unknown words
|
|
|
|
if (m_unknownsCollector) {
|
|
|
|
const vector<Phrase*>& unknowns = manager.getSntTranslationOptions()->GetUnknownSources();
|
|
|
|
ostringstream out;
|
|
|
|
for (size_t i = 0; i < unknowns.size(); ++i) {
|
|
|
|
out << *(unknowns[i]);
|
|
|
|
}
|
|
|
|
out << endl;
|
|
|
|
m_unknownsCollector->Write(m_lineNumber, out.str());
|
|
|
|
}
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
// report additional statistics
|
2011-02-24 15:39:29 +03:00
|
|
|
IFVERBOSE(2) {
|
|
|
|
PrintUserTime("Sentence Decoding Time:");
|
|
|
|
}
|
|
|
|
manager.CalcDecoderStatistics();
|
2012-08-10 23:32:00 +04:00
|
|
|
|
|
|
|
VERBOSE(1, "Line " << m_lineNumber << ": Translation took " << translationTime << " seconds total" << endl);
|
2011-02-24 15:39:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
~TranslationTask() {
|
|
|
|
delete m_source;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
InputType* m_source;
|
|
|
|
size_t m_lineNumber;
|
|
|
|
OutputCollector* m_outputCollector;
|
|
|
|
OutputCollector* m_nbestCollector;
|
2011-10-04 19:46:24 +04:00
|
|
|
OutputCollector* m_latticeSamplesCollector;
|
2011-02-24 15:39:29 +03:00
|
|
|
OutputCollector* m_wordGraphCollector;
|
|
|
|
OutputCollector* m_searchGraphCollector;
|
|
|
|
OutputCollector* m_detailedTranslationCollector;
|
|
|
|
OutputCollector* m_alignmentInfoCollector;
|
2012-09-21 11:55:37 +04:00
|
|
|
OutputCollector* m_unknownsCollector;
|
2013-03-04 21:07:37 +04:00
|
|
|
bool m_outputSearchGraphSLF;
|
|
|
|
bool m_outputSearchGraphHypergraph;
|
2011-02-24 15:39:29 +03:00
|
|
|
std::ofstream *m_alignmentStream;
|
2010-05-27 12:37:25 +04:00
|
|
|
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2011-02-24 15:39:29 +03:00
|
|
|
static void PrintFeatureWeight(const FeatureFunction* ff)
|
|
|
|
{
|
2011-01-05 16:49:44 +03:00
|
|
|
size_t numScoreComps = ff->GetNumScoreComponents();
|
2013-02-22 00:03:35 +04:00
|
|
|
if (numScoreComps != FeatureFunction::unlimited) {
|
2011-01-05 16:49:44 +03:00
|
|
|
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
|
2013-03-13 22:26:24 +04:00
|
|
|
cout << ff->GetScoreProducerDescription() << ":";
|
2012-12-17 21:17:44 +04:00
|
|
|
for (size_t i = 0; i < numScoreComps; ++i) {
|
|
|
|
cout << " " << values[i];
|
|
|
|
}
|
|
|
|
cout << endl;
|
2010-11-29 19:44:28 +03:00
|
|
|
}
|
2012-01-18 16:26:51 +04:00
|
|
|
else {
|
|
|
|
if (ff->GetSparseProducerWeight() == 1)
|
2013-03-13 22:26:24 +04:00
|
|
|
cout << ff->GetScoreProducerDescription() << ": sparse" << endl;
|
2012-01-18 16:26:51 +04:00
|
|
|
else
|
2013-03-13 22:26:24 +04:00
|
|
|
cout << ff->GetScoreProducerDescription() << ": " << ff->GetSparseProducerWeight() << endl;
|
2012-01-20 19:35:55 +04:00
|
|
|
}
|
|
|
|
}
|
2010-11-29 19:44:28 +03:00
|
|
|
|
2011-02-24 15:39:29 +03:00
|
|
|
static void ShowWeights()
|
|
|
|
{
|
2012-11-09 16:11:49 +04:00
|
|
|
//TODO: Find a way of ensuring this order is synced with the nbest
|
2010-11-29 19:44:28 +03:00
|
|
|
fix(cout,6);
|
|
|
|
const StaticData& staticData = StaticData::Instance();
|
|
|
|
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
|
2012-12-31 04:57:21 +04:00
|
|
|
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
|
|
|
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
2012-12-06 21:13:00 +04:00
|
|
|
|
2010-11-29 19:44:28 +03:00
|
|
|
for (size_t i = 0; i < sff.size(); ++i) {
|
2012-12-15 23:20:07 +04:00
|
|
|
const StatefulFeatureFunction *ff = sff[i];
|
|
|
|
if (ff->IsTuneable()) {
|
|
|
|
PrintFeatureWeight(ff);
|
|
|
|
}
|
2010-11-29 19:44:28 +03:00
|
|
|
}
|
|
|
|
for (size_t i = 0; i < slf.size(); ++i) {
|
2012-12-15 23:20:07 +04:00
|
|
|
const StatelessFeatureFunction *ff = slf[i];
|
|
|
|
if (ff->IsTuneable()) {
|
|
|
|
PrintFeatureWeight(ff);
|
|
|
|
}
|
2012-11-09 16:11:49 +04:00
|
|
|
}
|
2010-11-29 19:44:28 +03:00
|
|
|
}
|
|
|
|
|
2013-02-23 01:20:03 +04:00
|
|
|
size_t OutputFeatureWeightsForHypergraph(size_t index, const FeatureFunction* ff, std::ostream &outputSearchGraphStream)
|
|
|
|
{
|
|
|
|
size_t numScoreComps = ff->GetNumScoreComponents();
|
2013-02-24 04:31:29 +04:00
|
|
|
if (numScoreComps != FeatureFunction::unlimited) {
|
2013-02-23 01:20:03 +04:00
|
|
|
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
|
|
|
|
if (numScoreComps > 1) {
|
|
|
|
for (size_t i = 0; i < numScoreComps; ++i) {
|
2013-02-24 04:31:29 +04:00
|
|
|
outputSearchGraphStream << ff->GetScoreProducerDescription()
|
2013-02-23 01:20:03 +04:00
|
|
|
<< i
|
|
|
|
<< "=" << values[i] << endl;
|
|
|
|
}
|
|
|
|
} else {
|
2013-02-24 04:31:29 +04:00
|
|
|
outputSearchGraphStream << ff->GetScoreProducerDescription()
|
2013-02-23 01:20:03 +04:00
|
|
|
<< "=" << values[0] << endl;
|
|
|
|
}
|
|
|
|
return index+numScoreComps;
|
|
|
|
} else {
|
|
|
|
cerr << "Sparse features are not yet supported when outputting hypergraph format" << endl;
|
|
|
|
assert(false);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
|
|
|
|
{
|
|
|
|
outputSearchGraphStream.setf(std::ios::fixed);
|
|
|
|
outputSearchGraphStream.precision(6);
|
|
|
|
|
|
|
|
const StaticData& staticData = StaticData::Instance();
|
|
|
|
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
|
2013-02-24 04:31:29 +04:00
|
|
|
const vector<const StatelessFeatureFunction*>& slf =StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
|
|
|
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
2013-02-23 01:20:03 +04:00
|
|
|
size_t featureIndex = 1;
|
|
|
|
for (size_t i = 0; i < sff.size(); ++i) {
|
|
|
|
featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, sff[i], outputSearchGraphStream);
|
|
|
|
}
|
|
|
|
for (size_t i = 0; i < slf.size(); ++i) {
|
2013-02-24 04:31:29 +04:00
|
|
|
/*
|
2013-02-23 01:20:03 +04:00
|
|
|
if (slf[i]->GetScoreProducerWeightShortName() != "u" &&
|
|
|
|
slf[i]->GetScoreProducerWeightShortName() != "tm" &&
|
|
|
|
slf[i]->GetScoreProducerWeightShortName() != "I" &&
|
|
|
|
slf[i]->GetScoreProducerWeightShortName() != "g")
|
2013-02-24 04:31:29 +04:00
|
|
|
*/
|
2013-02-23 01:20:03 +04:00
|
|
|
{
|
|
|
|
featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, slf[i], outputSearchGraphStream);
|
|
|
|
}
|
|
|
|
}
|
2013-02-24 04:31:29 +04:00
|
|
|
const vector<PhraseDictionary*>& pds = staticData.GetPhraseDictionaries();
|
2013-02-23 01:20:03 +04:00
|
|
|
for( size_t i=0; i<pds.size(); i++ ) {
|
|
|
|
featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, pds[i], outputSearchGraphStream);
|
|
|
|
}
|
2013-02-24 04:31:29 +04:00
|
|
|
const vector<const GenerationDictionary*>& gds = staticData.GetGenerationDictionaries();
|
2013-02-23 01:20:03 +04:00
|
|
|
for( size_t i=0; i<gds.size(); i++ ) {
|
|
|
|
featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, gds[i], outputSearchGraphStream);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-07-02 20:05:11 +04:00
|
|
|
} //namespace
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
/** main function of the command line version of the decoder **/
|
2011-02-24 15:39:29 +03:00
|
|
|
int main(int argc, char** argv)
|
|
|
|
{
|
2012-01-13 19:20:42 +04:00
|
|
|
try {
|
|
|
|
|
2010-05-27 12:37:25 +04:00
|
|
|
#ifdef HAVE_PROTOBUF
|
2012-01-13 19:20:42 +04:00
|
|
|
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
2010-05-27 12:37:25 +04:00
|
|
|
#endif
|
2011-03-02 22:02:07 +03:00
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
// echo command line, if verbose
|
|
|
|
IFVERBOSE(1) {
|
|
|
|
TRACE_ERR("command: ");
|
|
|
|
for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
|
|
|
|
TRACE_ERR(endl);
|
2010-05-27 12:37:25 +04:00
|
|
|
}
|
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
// set number of significant decimals in output
|
|
|
|
fix(cout,PRECISION);
|
|
|
|
fix(cerr,PRECISION);
|
2008-10-30 22:44:54 +03:00
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
// load all the settings into the Parameter class
|
|
|
|
// (stores them as strings, or array of strings)
|
|
|
|
Parameter* params = new Parameter();
|
|
|
|
if (!params->LoadParam(argc,argv)) {
|
|
|
|
exit(1);
|
|
|
|
}
|
2011-02-24 15:39:29 +03:00
|
|
|
|
2011-08-30 16:25:50 +04:00
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
// initialize all "global" variables, which are stored in StaticData
|
|
|
|
// note: this also loads models such as the language model, etc.
|
2012-07-31 00:07:19 +04:00
|
|
|
if (!StaticData::LoadDataStatic(params, argv[0])) {
|
2012-01-13 19:20:42 +04:00
|
|
|
exit(1);
|
|
|
|
}
|
2007-01-22 19:21:28 +03:00
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
// setting "-show-weights" -> just dump out weights and exit
|
|
|
|
if (params->isParamSpecified("show-weights")) {
|
|
|
|
ShowWeights();
|
|
|
|
exit(0);
|
|
|
|
}
|
2011-08-18 01:13:21 +04:00
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
// shorthand for accessing information in StaticData
|
|
|
|
const StaticData& staticData = StaticData::Instance();
|
2011-03-02 22:02:07 +03:00
|
|
|
|
2011-09-23 02:29:56 +04:00
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
//initialise random numbers
|
|
|
|
srand(time(NULL));
|
2011-10-04 19:46:24 +04:00
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
// set up read/writing class
|
2012-06-29 07:19:28 +04:00
|
|
|
IOWrapper* ioWrapper = GetIOWrapper(staticData);
|
2012-01-13 19:20:42 +04:00
|
|
|
if (!ioWrapper) {
|
|
|
|
cerr << "Error; Failed to create IO object" << endl;
|
|
|
|
exit(1);
|
|
|
|
}
|
2011-02-24 15:39:29 +03:00
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
// check on weights
|
2012-05-25 00:11:35 +04:00
|
|
|
const ScoreComponentCollection& weights = staticData.GetAllWeights();
|
2012-01-13 19:20:42 +04:00
|
|
|
IFVERBOSE(2) {
|
2012-05-25 00:11:35 +04:00
|
|
|
TRACE_ERR("The global weight vector looks like this: ");
|
|
|
|
TRACE_ERR(weights);
|
2012-01-13 19:20:42 +04:00
|
|
|
TRACE_ERR("\n");
|
2010-05-27 12:37:25 +04:00
|
|
|
}
|
2013-02-23 01:20:03 +04:00
|
|
|
if (staticData.GetOutputSearchGraphHypergraph() && staticData.GetParam("output-search-graph-hypergraph").size() > 1) {
|
|
|
|
ofstream* weightsOut = ioWrapper->GetOutputSearchGraphHypergraphWeightsStream();
|
|
|
|
OutputFeatureWeightsForHypergraph(*weightsOut);
|
|
|
|
weightsOut->flush();
|
|
|
|
weightsOut->close();
|
|
|
|
delete weightsOut;
|
|
|
|
}
|
|
|
|
|
2011-02-24 15:39:29 +03:00
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
// initialize output streams
|
|
|
|
// note: we can't just write to STDOUT or files
|
|
|
|
// because multithreading may return sentences in shuffled order
|
|
|
|
auto_ptr<OutputCollector> outputCollector; // for translations
|
|
|
|
auto_ptr<OutputCollector> nbestCollector; // for n-best lists
|
|
|
|
auto_ptr<OutputCollector> latticeSamplesCollector; //for lattice samples
|
|
|
|
auto_ptr<ofstream> nbestOut;
|
|
|
|
auto_ptr<ofstream> latticeSamplesOut;
|
|
|
|
size_t nbestSize = staticData.GetNBestSize();
|
|
|
|
string nbestFile = staticData.GetNBestFilePath();
|
|
|
|
bool output1best = true;
|
|
|
|
if (nbestSize) {
|
|
|
|
if (nbestFile == "-" || nbestFile == "/dev/stdout") {
|
|
|
|
// nbest to stdout, no 1-best
|
|
|
|
nbestCollector.reset(new OutputCollector());
|
|
|
|
output1best = false;
|
|
|
|
} else {
|
|
|
|
// nbest to file, 1-best to stdout
|
|
|
|
nbestOut.reset(new ofstream(nbestFile.c_str()));
|
|
|
|
if (!nbestOut->good()) {
|
|
|
|
TRACE_ERR("ERROR: Failed to open " << nbestFile << " for nbest lists" << endl);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
nbestCollector.reset(new OutputCollector(nbestOut.get()));
|
2011-10-04 19:46:24 +04:00
|
|
|
}
|
2010-05-27 12:37:25 +04:00
|
|
|
}
|
2012-01-13 19:20:42 +04:00
|
|
|
size_t latticeSamplesSize = staticData.GetLatticeSamplesSize();
|
|
|
|
string latticeSamplesFile = staticData.GetLatticeSamplesFilePath();
|
|
|
|
if (latticeSamplesSize) {
|
|
|
|
if (latticeSamplesFile == "-" || latticeSamplesFile == "/dev/stdout") {
|
|
|
|
latticeSamplesCollector.reset(new OutputCollector());
|
|
|
|
output1best = false;
|
|
|
|
} else {
|
|
|
|
latticeSamplesOut.reset(new ofstream(latticeSamplesFile.c_str()));
|
|
|
|
if (!latticeSamplesOut->good()) {
|
|
|
|
TRACE_ERR("ERROR: Failed to open " << latticeSamplesFile << " for lattice samples" << endl);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
latticeSamplesCollector.reset(new OutputCollector(latticeSamplesOut.get()));
|
2011-10-04 19:46:24 +04:00
|
|
|
}
|
|
|
|
}
|
2012-01-13 19:20:42 +04:00
|
|
|
if (output1best) {
|
|
|
|
outputCollector.reset(new OutputCollector());
|
|
|
|
}
|
|
|
|
|
|
|
|
// initialize stream for word graph (aka: output lattice)
|
|
|
|
auto_ptr<OutputCollector> wordGraphCollector;
|
|
|
|
if (staticData.GetOutputWordGraph()) {
|
|
|
|
wordGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputWordGraphStream())));
|
|
|
|
}
|
|
|
|
|
|
|
|
// initialize stream for search graph
|
|
|
|
// note: this is essentially the same as above, but in a different format
|
|
|
|
auto_ptr<OutputCollector> searchGraphCollector;
|
|
|
|
if (staticData.GetOutputSearchGraph()) {
|
|
|
|
searchGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputSearchGraphStream())));
|
|
|
|
}
|
|
|
|
|
|
|
|
// initialize stram for details about the decoder run
|
|
|
|
auto_ptr<OutputCollector> detailedTranslationCollector;
|
|
|
|
if (staticData.IsDetailedTranslationReportingEnabled()) {
|
|
|
|
detailedTranslationCollector.reset(new OutputCollector(&(ioWrapper->GetDetailedTranslationReportingStream())));
|
|
|
|
}
|
|
|
|
|
|
|
|
// initialize stram for word alignment between input and output
|
|
|
|
auto_ptr<OutputCollector> alignmentInfoCollector;
|
|
|
|
if (!staticData.GetAlignmentOutputFile().empty()) {
|
|
|
|
alignmentInfoCollector.reset(new OutputCollector(ioWrapper->GetAlignmentOutputStream()));
|
|
|
|
}
|
2012-09-21 11:55:37 +04:00
|
|
|
|
|
|
|
//initialise stream for unknown (oov) words
|
|
|
|
auto_ptr<OutputCollector> unknownsCollector;
|
|
|
|
auto_ptr<ofstream> unknownsStream;
|
|
|
|
if (!staticData.GetOutputUnknownsFile().empty()) {
|
|
|
|
unknownsStream.reset(new ofstream(staticData.GetOutputUnknownsFile().c_str()));
|
|
|
|
if (!unknownsStream->good()) {
|
|
|
|
TRACE_ERR("Unable to open " << staticData.GetOutputUnknownsFile() << " for unknowns");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
unknownsCollector.reset(new OutputCollector(unknownsStream.get()));
|
|
|
|
}
|
2012-01-13 19:20:42 +04:00
|
|
|
|
2011-09-23 02:29:56 +04:00
|
|
|
#ifdef WITH_THREADS
|
2012-01-13 19:20:42 +04:00
|
|
|
ThreadPool pool(staticData.ThreadCount());
|
2011-09-23 02:29:56 +04:00
|
|
|
#endif
|
2012-01-13 19:20:42 +04:00
|
|
|
|
|
|
|
// main loop over set of input sentences
|
|
|
|
InputType* source = NULL;
|
2013-02-14 00:52:40 +04:00
|
|
|
size_t lineCount = staticData.GetStartTranslationId();
|
2012-01-13 19:20:42 +04:00
|
|
|
while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
|
|
|
|
IFVERBOSE(1) {
|
|
|
|
ResetUserTime();
|
|
|
|
}
|
|
|
|
// set up task of translating one sentence
|
|
|
|
TranslationTask* task =
|
|
|
|
new TranslationTask(lineCount,source, outputCollector.get(),
|
|
|
|
nbestCollector.get(),
|
|
|
|
latticeSamplesCollector.get(),
|
|
|
|
wordGraphCollector.get(),
|
|
|
|
searchGraphCollector.get(),
|
|
|
|
detailedTranslationCollector.get(),
|
2012-09-21 11:55:37 +04:00
|
|
|
alignmentInfoCollector.get(),
|
2013-02-15 22:06:54 +04:00
|
|
|
unknownsCollector.get(),
|
2013-03-04 21:07:37 +04:00
|
|
|
staticData.GetOutputSearchGraphSLF(),
|
|
|
|
staticData.GetOutputSearchGraphHypergraph());
|
2012-01-13 19:20:42 +04:00
|
|
|
// execute task
|
2010-05-27 12:37:25 +04:00
|
|
|
#ifdef WITH_THREADS
|
2012-01-13 19:20:42 +04:00
|
|
|
pool.Submit(task);
|
2010-05-27 12:37:25 +04:00
|
|
|
#else
|
2012-01-13 19:20:42 +04:00
|
|
|
task->Run();
|
2012-03-30 23:25:42 +04:00
|
|
|
delete task;
|
2010-05-27 12:37:25 +04:00
|
|
|
#endif
|
2012-01-13 19:20:42 +04:00
|
|
|
|
|
|
|
source = NULL; //make sure it doesn't get deleted
|
|
|
|
++lineCount;
|
|
|
|
}
|
|
|
|
|
2011-03-02 22:02:07 +03:00
|
|
|
// we are done, finishing up
|
2010-05-27 12:37:25 +04:00
|
|
|
#ifdef WITH_THREADS
|
2012-01-13 19:20:42 +04:00
|
|
|
pool.Stop(true); //flush remaining jobs
|
2010-05-27 12:37:25 +04:00
|
|
|
#endif
|
|
|
|
|
2012-01-13 19:20:42 +04:00
|
|
|
} catch (const std::exception &e) {
|
|
|
|
std::cerr << "Exception: " << e.what() << std::endl;
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
|
2012-10-05 20:49:52 +04:00
|
|
|
IFVERBOSE(1) util::PrintUsage(std::cerr);
|
|
|
|
|
2010-05-27 12:37:25 +04:00
|
|
|
#ifndef EXIT_RETURN
|
2011-02-24 15:39:29 +03:00
|
|
|
//This avoids that destructors are called (it can take a long time)
|
|
|
|
exit(EXIT_SUCCESS);
|
2010-05-27 12:37:25 +04:00
|
|
|
#else
|
2011-02-24 15:39:29 +03:00
|
|
|
return EXIT_SUCCESS;
|
2010-05-27 12:37:25 +04:00
|
|
|
#endif
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|