2014-09-30 15:25:36 +04:00
# include "TranslationTask.h"
# include "moses/StaticData.h"
# include "moses/Sentence.h"
# include "moses/IOWrapper.h"
# include "moses/TranslationAnalysis.h"
# include "moses/TypeDef.h"
# include "moses/Util.h"
# include "moses/InputType.h"
# include "moses/OutputCollector.h"
# include "mbr.h"
using namespace std ;
using namespace Moses ;
namespace MosesCmd
{
2014-10-07 22:16:30 +04:00
TranslationTask : : TranslationTask ( InputType * source , MosesCmd : : IOWrapper & ioWrapper ,
2014-09-30 15:25:36 +04:00
bool outputSearchGraphSLF ,
boost : : shared_ptr < HypergraphOutput < Manager > > hypergraphOutput ) :
2014-10-07 22:16:30 +04:00
m_source ( source ) ,
2014-10-01 20:43:51 +04:00
m_ioWrapper ( ioWrapper ) ,
2014-09-30 15:25:36 +04:00
m_outputSearchGraphSLF ( outputSearchGraphSLF ) ,
m_hypergraphOutput ( hypergraphOutput )
{ }
2014-09-30 15:47:28 +04:00
TranslationTask : : ~ TranslationTask ( ) {
delete m_source ;
}
2014-09-30 15:25:36 +04:00
void TranslationTask : : Run ( ) {
// shorthand for "global data"
const StaticData & staticData = StaticData : : Instance ( ) ;
// input sentence
Sentence sentence ;
// report wall time spent on translation
Timer translationTime ;
translationTime . start ( ) ;
// report thread number
# if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
2014-10-07 22:16:30 +04:00
TRACE_ERR ( " Translating line " < < m_source - > GetTranslationId ( ) < < " in thread id " < < pthread_self ( ) < < endl ) ;
2014-09-30 15:25:36 +04:00
# endif
// execute the translation
// note: this executes the search, resulting in a search graph
// we still need to apply the decision rule (MAP, MBR, ...)
Timer initTime ;
initTime . start ( ) ;
2014-10-07 22:16:30 +04:00
Manager manager ( * m_source , staticData . GetSearchAlgorithm ( ) ) ;
VERBOSE ( 1 , " Line " < < m_source - > GetTranslationId ( ) < < " : Initialize search took " < < initTime < < " seconds total " < < endl ) ;
2014-09-30 15:25:36 +04:00
manager . ProcessSentence ( ) ;
// we are done with search, let's look what we got
Timer additionalReportingTime ;
additionalReportingTime . start ( ) ;
// output word graph
2014-10-02 17:57:04 +04:00
if ( m_ioWrapper . GetWordGraphCollector ( ) ) {
2014-09-30 15:25:36 +04:00
ostringstream out ;
fix ( out , PRECISION ) ;
2014-10-07 22:16:30 +04:00
manager . GetWordGraph ( m_source - > GetTranslationId ( ) , out ) ;
m_ioWrapper . GetWordGraphCollector ( ) - > Write ( m_source - > GetTranslationId ( ) , out . str ( ) ) ;
2014-09-30 15:25:36 +04:00
}
// output search graph
2014-10-02 15:20:49 +04:00
if ( m_ioWrapper . GetSearchGraphOutputCollector ( ) ) {
2014-09-30 15:25:36 +04:00
ostringstream out ;
fix ( out , PRECISION ) ;
2014-10-07 22:16:30 +04:00
manager . OutputSearchGraph ( m_source - > GetTranslationId ( ) , out ) ;
m_ioWrapper . GetSearchGraphOutputCollector ( ) - > Write ( m_source - > GetTranslationId ( ) , out . str ( ) ) ;
2014-09-30 15:25:36 +04:00
# ifdef HAVE_PROTOBUF
if ( staticData . GetOutputSearchGraphPB ( ) ) {
ostringstream sfn ;
2014-10-07 22:16:30 +04:00
sfn < < staticData . GetParam ( " output-search-graph-pb " ) [ 0 ] < < ' / ' < < m_source - > GetTranslationId ( ) < < " .pb " < < ends ;
2014-09-30 15:25:36 +04:00
string fn = sfn . str ( ) ;
VERBOSE ( 2 , " Writing search graph to " < < fn < < endl ) ;
fstream output ( fn . c_str ( ) , ios : : trunc | ios : : binary | ios : : out ) ;
2014-10-07 22:16:30 +04:00
manager . SerializeSearchGraphPB ( m_source - > GetTranslationId ( ) , output ) ;
2014-09-30 15:25:36 +04:00
}
# endif
}
// Output search graph in HTK standard lattice format (SLF)
if ( m_outputSearchGraphSLF ) {
stringstream fileName ;
2014-10-07 22:16:30 +04:00
fileName < < staticData . GetParam ( " output-search-graph-slf " ) [ 0 ] < < " / " < < m_source - > GetTranslationId ( ) < < " .slf " ;
2014-09-30 15:25:36 +04:00
ofstream * file = new ofstream ;
file - > open ( fileName . str ( ) . c_str ( ) ) ;
if ( file - > is_open ( ) & & file - > good ( ) ) {
ostringstream out ;
fix ( out , PRECISION ) ;
2014-10-07 22:16:30 +04:00
manager . OutputSearchGraphAsSLF ( m_source - > GetTranslationId ( ) , out ) ;
2014-09-30 15:25:36 +04:00
* file < < out . str ( ) ;
file - > flush ( ) ;
} else {
2014-10-07 22:16:30 +04:00
TRACE_ERR ( " Cannot output HTK standard lattice for line " < < m_source - > GetTranslationId ( ) < < " because the output file is not open or not ready for writing " < < endl ) ;
2014-09-30 15:25:36 +04:00
}
delete file ;
}
// Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
if ( m_hypergraphOutput . get ( ) ) {
m_hypergraphOutput - > Write ( manager ) ;
}
additionalReportingTime . stop ( ) ;
// apply decision rule and output best translation(s)
2014-10-01 20:43:51 +04:00
if ( m_ioWrapper . GetSingleBestOutputCollector ( ) ) {
2014-09-30 15:25:36 +04:00
ostringstream out ;
ostringstream debug ;
fix ( debug , PRECISION ) ;
// all derivations - send them to debug stream
if ( staticData . PrintAllDerivations ( ) ) {
additionalReportingTime . start ( ) ;
2014-10-07 22:16:30 +04:00
manager . PrintAllDerivations ( m_source - > GetTranslationId ( ) , debug ) ;
2014-09-30 15:25:36 +04:00
additionalReportingTime . stop ( ) ;
}
Timer decisionRuleTime ;
decisionRuleTime . start ( ) ;
// MAP decoding: best hypothesis
const Hypothesis * bestHypo = NULL ;
if ( ! staticData . UseMBR ( ) ) {
bestHypo = manager . GetBestHypothesis ( ) ;
if ( bestHypo ) {
if ( StaticData : : Instance ( ) . GetOutputHypoScore ( ) ) {
out < < bestHypo - > GetTotalScore ( ) < < ' ' ;
}
if ( staticData . IsPathRecoveryEnabled ( ) ) {
OutputInput ( out , bestHypo ) ;
out < < " ||| " ;
}
if ( staticData . GetParam ( " print-id " ) . size ( ) & & Scan < bool > ( staticData . GetParam ( " print-id " ) [ 0 ] ) ) {
out < < m_source - > GetTranslationId ( ) < < " " ;
}
if ( staticData . GetReportSegmentation ( ) = = 2 ) {
manager . GetOutputLanguageModelOrder ( out , bestHypo ) ;
}
OutputBestSurface (
out ,
bestHypo ,
staticData . GetOutputFactorOrder ( ) ,
staticData . GetReportSegmentation ( ) ,
staticData . GetReportAllFactors ( ) ) ;
if ( staticData . PrintAlignmentInfo ( ) ) {
out < < " ||| " ;
OutputAlignment ( out , bestHypo ) ;
}
2014-10-07 22:16:30 +04:00
OutputAlignment ( m_ioWrapper . GetAlignmentInfoCollector ( ) , m_source - > GetTranslationId ( ) , bestHypo ) ;
2014-09-30 15:25:36 +04:00
IFVERBOSE ( 1 ) {
debug < < " BEST TRANSLATION: " < < * bestHypo < < endl ;
}
} else {
VERBOSE ( 1 , " NO BEST TRANSLATION " < < endl ) ;
}
out < < endl ;
}
// MBR decoding (n-best MBR, lattice MBR, consensus)
else {
// we first need the n-best translations
size_t nBestSize = staticData . GetMBRSize ( ) ;
if ( nBestSize < = 0 ) {
cerr < < " ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size) " < < endl ;
exit ( 1 ) ;
}
TrellisPathList nBestList ;
manager . CalcNBest ( nBestSize , nBestList , true ) ;
VERBOSE ( 2 , " size of n-best: " < < nBestList . GetSize ( ) < < " ( " < < nBestSize < < " ) " < < endl ) ;
IFVERBOSE ( 2 ) {
PrintUserTime ( " calculated n-best list for (L)MBR decoding " ) ;
}
// lattice MBR
if ( staticData . UseLatticeMBR ( ) ) {
2014-10-01 21:21:57 +04:00
if ( m_ioWrapper . GetNBestOutputCollector ( ) ) {
2014-09-30 15:25:36 +04:00
//lattice mbr nbest
vector < LatticeMBRSolution > solutions ;
size_t n = min ( nBestSize , staticData . GetNBestSize ( ) ) ;
getLatticeMBRNBest ( manager , nBestList , solutions , n ) ;
ostringstream out ;
2014-10-07 22:16:30 +04:00
OutputLatticeMBRNBest ( out , solutions , m_source - > GetTranslationId ( ) ) ;
m_ioWrapper . GetNBestOutputCollector ( ) - > Write ( m_source - > GetTranslationId ( ) , out . str ( ) ) ;
2014-09-30 15:25:36 +04:00
} else {
//Lattice MBR decoding
vector < Word > mbrBestHypo = doLatticeMBR ( manager , nBestList ) ;
2014-10-07 22:16:30 +04:00
OutputBestHypo ( mbrBestHypo , m_source - > GetTranslationId ( ) , staticData . GetReportSegmentation ( ) ,
2014-09-30 15:25:36 +04:00
staticData . GetReportAllFactors ( ) , out ) ;
IFVERBOSE ( 2 ) {
PrintUserTime ( " finished Lattice MBR decoding " ) ;
}
}
}
// consensus decoding
else if ( staticData . UseConsensusDecoding ( ) ) {
const TrellisPath & conBestHypo = doConsensusDecoding ( manager , nBestList ) ;
2014-10-07 22:16:30 +04:00
OutputBestHypo ( conBestHypo , m_source - > GetTranslationId ( ) ,
2014-09-30 15:25:36 +04:00
staticData . GetReportSegmentation ( ) ,
staticData . GetReportAllFactors ( ) , out ) ;
2014-10-07 22:16:30 +04:00
OutputAlignment ( m_ioWrapper . GetAlignmentInfoCollector ( ) , m_source - > GetTranslationId ( ) , conBestHypo ) ;
2014-09-30 15:25:36 +04:00
IFVERBOSE ( 2 ) {
PrintUserTime ( " finished Consensus decoding " ) ;
}
}
// n-best MBR decoding
else {
const TrellisPath & mbrBestHypo = doMBR ( nBestList ) ;
2014-10-07 22:16:30 +04:00
OutputBestHypo ( mbrBestHypo , m_source - > GetTranslationId ( ) ,
2014-09-30 15:25:36 +04:00
staticData . GetReportSegmentation ( ) ,
staticData . GetReportAllFactors ( ) , out ) ;
2014-10-07 22:16:30 +04:00
OutputAlignment ( m_ioWrapper . GetAlignmentInfoCollector ( ) , m_source - > GetTranslationId ( ) , mbrBestHypo ) ;
2014-09-30 15:25:36 +04:00
IFVERBOSE ( 2 ) {
PrintUserTime ( " finished MBR decoding " ) ;
}
}
}
// report best translation to output collector
2014-10-07 22:16:30 +04:00
m_ioWrapper . GetSingleBestOutputCollector ( ) - > Write ( m_source - > GetTranslationId ( ) , out . str ( ) , debug . str ( ) ) ;
2014-09-30 15:25:36 +04:00
decisionRuleTime . stop ( ) ;
2014-10-07 22:16:30 +04:00
VERBOSE ( 1 , " Line " < < m_source - > GetTranslationId ( ) < < " : Decision rule took " < < decisionRuleTime < < " seconds total " < < endl ) ;
2014-09-30 15:25:36 +04:00
}
additionalReportingTime . start ( ) ;
// output n-best list
2014-10-01 21:21:57 +04:00
if ( m_ioWrapper . GetNBestOutputCollector ( ) & & ! staticData . UseLatticeMBR ( ) ) {
2014-09-30 15:25:36 +04:00
TrellisPathList nBestList ;
ostringstream out ;
manager . CalcNBest ( staticData . GetNBestSize ( ) , nBestList , staticData . GetDistinctNBest ( ) ) ;
2014-10-07 22:16:30 +04:00
OutputNBest ( out , nBestList , staticData . GetOutputFactorOrder ( ) , m_source - > GetTranslationId ( ) ,
2014-09-30 15:25:36 +04:00
staticData . GetReportSegmentation ( ) ) ;
2014-10-07 22:16:30 +04:00
m_ioWrapper . GetNBestOutputCollector ( ) - > Write ( m_source - > GetTranslationId ( ) , out . str ( ) ) ;
2014-09-30 15:25:36 +04:00
}
//lattice samples
2014-10-03 17:57:27 +04:00
if ( m_ioWrapper . GetLatticeSamplesCollector ( ) ) {
2014-09-30 15:25:36 +04:00
TrellisPathList latticeSamples ;
ostringstream out ;
manager . CalcLatticeSamples ( staticData . GetLatticeSamplesSize ( ) , latticeSamples ) ;
2014-10-07 22:16:30 +04:00
OutputNBest ( out , latticeSamples , staticData . GetOutputFactorOrder ( ) , m_source - > GetTranslationId ( ) ,
2014-09-30 15:25:36 +04:00
staticData . GetReportSegmentation ( ) ) ;
2014-10-07 22:16:30 +04:00
m_ioWrapper . GetLatticeSamplesCollector ( ) - > Write ( m_source - > GetTranslationId ( ) , out . str ( ) ) ;
2014-09-30 15:25:36 +04:00
}
// detailed translation reporting
2014-10-02 17:18:12 +04:00
if ( m_ioWrapper . GetDetailedTranslationCollector ( ) ) {
2014-09-30 15:25:36 +04:00
ostringstream out ;
fix ( out , PRECISION ) ;
TranslationAnalysis : : PrintTranslationAnalysis ( out , manager . GetBestHypothesis ( ) ) ;
2014-10-07 22:16:30 +04:00
m_ioWrapper . GetDetailedTranslationCollector ( ) - > Write ( m_source - > GetTranslationId ( ) , out . str ( ) ) ;
2014-09-30 15:25:36 +04:00
}
//list of unknown words
2014-10-01 22:24:58 +04:00
if ( m_ioWrapper . GetUnknownsCollector ( ) ) {
2014-09-30 15:25:36 +04:00
const vector < const Phrase * > & unknowns = manager . getSntTranslationOptions ( ) - > GetUnknownSources ( ) ;
ostringstream out ;
for ( size_t i = 0 ; i < unknowns . size ( ) ; + + i ) {
out < < * ( unknowns [ i ] ) ;
}
out < < endl ;
2014-10-07 22:16:30 +04:00
m_ioWrapper . GetUnknownsCollector ( ) - > Write ( m_source - > GetTranslationId ( ) , out . str ( ) ) ;
2014-09-30 15:25:36 +04:00
}
// report additional statistics
manager . CalcDecoderStatistics ( ) ;
2014-10-07 22:16:30 +04:00
VERBOSE ( 1 , " Line " < < m_source - > GetTranslationId ( ) < < " : Additional reporting took " < < additionalReportingTime < < " seconds total " < < endl ) ;
VERBOSE ( 1 , " Line " < < m_source - > GetTranslationId ( ) < < " : Translation took " < < translationTime < < " seconds total " < < endl ) ;
2014-09-30 15:25:36 +04:00
IFVERBOSE ( 2 ) {
PrintUserTime ( " Sentence Decoding Time: " ) ;
}
}
}