2010-05-27 12:37:25 +04:00
// $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
2006-07-04 22:04:38 +04:00
/***********************************************************************
Moses - factored phrase - based language decoder
2010-05-27 12:37:25 +04:00
Copyright ( C ) 2009 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
2006-07-04 22:04:38 +04:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-05-27 12:37:25 +04:00
/**
* Moses main , for single - threaded and multi - threaded .
* */
2013-03-22 20:14:28 +04:00
# include <boost/algorithm/string/predicate.hpp>
2013-03-22 00:48:47 +04:00
# include <boost/filesystem.hpp>
2013-03-21 23:19:31 +04:00
# include <boost/iostreams/device/file.hpp>
# include <boost/iostreams/filter/bzip2.hpp>
# include <boost/iostreams/filter/gzip.hpp>
# include <boost/iostreams/filtering_stream.hpp>
2012-01-13 19:20:42 +04:00
# include <exception>
2010-05-27 12:37:25 +04:00
# include <fstream>
# include <sstream>
# include <vector>
2006-07-04 22:04:38 +04:00
2012-10-05 20:49:52 +04:00
# include "util/usage.hh"
2006-07-04 22:04:38 +04:00
# ifdef WIN32
// Include Visual Leak Detector
2011-07-24 03:52:34 +04:00
//#include <vld.h>
2006-07-04 22:04:38 +04:00
# endif
2012-11-13 00:21:32 +04:00
# include "TranslationAnalysis.h"
2010-05-27 12:37:25 +04:00
# include "IOWrapper.h"
2007-05-16 00:54:39 +04:00
# include "mbr.h"
2012-11-13 00:21:32 +04:00
# include "moses/Hypothesis.h"
# include "moses/Manager.h"
# include "moses/StaticData.h"
# include "moses/Util.h"
# include "moses/Timer.h"
# include "moses/ThreadPool.h"
# include "moses/OutputCollector.h"
2008-09-24 20:48:23 +04:00
# ifdef HAVE_PROTOBUF
# include "hypergraph.pb.h"
2006-07-04 22:04:38 +04:00
# endif
using namespace std ;
2008-10-09 03:51:26 +04:00
using namespace Moses ;
2012-07-02 20:05:11 +04:00
using namespace MosesCmd ;
2006-08-08 01:18:13 +04:00
2012-07-02 20:05:11 +04:00
namespace MosesCmd
{
2011-03-02 22:02:07 +03:00
// output floats with three significant digits
2010-11-29 19:44:28 +03:00
static const size_t PRECISION = 3 ;
2010-05-27 12:37:25 +04:00
/** Enforce rounding */
2011-02-24 15:39:29 +03:00
void fix ( std : : ostream & stream , size_t size )
{
stream . setf ( std : : ios : : fixed ) ;
stream . precision ( size ) ;
2010-05-27 12:37:25 +04:00
}
2011-03-02 22:02:07 +03:00
/** Translates a sentence.
* - calls the search ( Manager )
* - applies the decision rule
* - outputs best translation and additional reporting
2010-05-27 12:37:25 +04:00
* */
2011-02-24 15:39:29 +03:00
class TranslationTask : public Task
{
public :
TranslationTask ( size_t lineNumber ,
InputType * source , OutputCollector * outputCollector , OutputCollector * nbestCollector ,
2011-10-04 19:46:24 +04:00
OutputCollector * latticeSamplesCollector ,
2011-02-24 15:39:29 +03:00
OutputCollector * wordGraphCollector , OutputCollector * searchGraphCollector ,
OutputCollector * detailedTranslationCollector ,
2012-09-21 11:55:37 +04:00
OutputCollector * alignmentInfoCollector ,
2013-02-15 22:06:54 +04:00
OutputCollector * unknownsCollector ,
2013-03-04 21:07:37 +04:00
bool outputSearchGraphSLF ,
bool outputSearchGraphHypergraph ) :
2011-02-24 15:39:29 +03:00
m_source ( source ) , m_lineNumber ( lineNumber ) ,
m_outputCollector ( outputCollector ) , m_nbestCollector ( nbestCollector ) ,
2011-10-04 19:46:24 +04:00
m_latticeSamplesCollector ( latticeSamplesCollector ) ,
2011-02-24 15:39:29 +03:00
m_wordGraphCollector ( wordGraphCollector ) , m_searchGraphCollector ( searchGraphCollector ) ,
m_detailedTranslationCollector ( detailedTranslationCollector ) ,
2012-09-21 11:55:37 +04:00
m_alignmentInfoCollector ( alignmentInfoCollector ) ,
2013-02-15 22:06:54 +04:00
m_unknownsCollector ( unknownsCollector ) ,
2013-03-04 21:07:37 +04:00
m_outputSearchGraphSLF ( outputSearchGraphSLF ) ,
m_outputSearchGraphHypergraph ( outputSearchGraphHypergraph ) { }
2011-02-24 15:39:29 +03:00
2011-03-02 22:02:07 +03:00
/** Translate one sentence
* gets called by main function implemented at end of this source file */
2011-02-24 15:39:29 +03:00
void Run ( ) {
2011-03-02 22:02:07 +03:00
// report thread number
2012-09-22 02:34:48 +04:00
# if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
2011-02-24 15:39:29 +03:00
TRACE_ERR ( " Translating line " < < m_lineNumber < < " in thread id " < < pthread_self ( ) < < std : : endl ) ;
2010-05-27 12:37:25 +04:00
# endif
2011-03-02 22:02:07 +03:00
2012-08-10 23:32:00 +04:00
Timer translationTime ;
translationTime . start ( ) ;
2011-03-02 22:02:07 +03:00
// shorthand for "global data"
2011-02-24 15:39:29 +03:00
const StaticData & staticData = StaticData : : Instance ( ) ;
2011-03-02 22:02:07 +03:00
// input sentence
2011-11-21 15:14:05 +04:00
Sentence sentence ( ) ;
2011-03-02 22:02:07 +03:00
// execute the translation
// note: this executes the search, resulting in a search graph
// we still need to apply the decision rule (MAP, MBR, ...)
2013-05-11 17:13:26 +04:00
Manager manager ( m_lineNumber , * m_source , staticData . GetSearchAlgorithm ( ) ) ;
2011-02-24 15:39:29 +03:00
manager . ProcessSentence ( ) ;
2011-03-02 22:02:07 +03:00
// output word graph
2011-02-24 15:39:29 +03:00
if ( m_wordGraphCollector ) {
ostringstream out ;
fix ( out , PRECISION ) ;
manager . GetWordGraph ( m_lineNumber , out ) ;
m_wordGraphCollector - > Write ( m_lineNumber , out . str ( ) ) ;
}
2011-03-02 22:02:07 +03:00
// output search graph
2011-02-24 15:39:29 +03:00
if ( m_searchGraphCollector ) {
ostringstream out ;
fix ( out , PRECISION ) ;
manager . OutputSearchGraph ( m_lineNumber , out ) ;
m_searchGraphCollector - > Write ( m_lineNumber , out . str ( ) ) ;
2008-03-18 00:34:19 +03:00
2008-09-24 20:48:23 +04:00
# ifdef HAVE_PROTOBUF
2011-02-24 15:39:29 +03:00
if ( staticData . GetOutputSearchGraphPB ( ) ) {
ostringstream sfn ;
sfn < < staticData . GetParam ( " output-search-graph-pb " ) [ 0 ] < < ' / ' < < m_lineNumber < < " .pb " < < ends ;
string fn = sfn . str ( ) ;
VERBOSE ( 2 , " Writing search graph to " < < fn < < endl ) ;
fstream output ( fn . c_str ( ) , ios : : trunc | ios : : binary | ios : : out ) ;
manager . SerializeSearchGraphPB ( m_lineNumber , output ) ;
}
2008-09-24 20:48:23 +04:00
# endif
2011-03-02 22:02:07 +03:00
}
2010-05-27 12:37:25 +04:00
2013-02-15 22:06:54 +04:00
// Output search graph in HTK standard lattice format (SLF)
2013-03-04 21:07:37 +04:00
if ( m_outputSearchGraphSLF ) {
stringstream fileName ;
fileName < < staticData . GetParam ( " output-search-graph-slf " ) [ 0 ] < < " / " < < m_lineNumber < < " .slf " ;
std : : ofstream * file = new std : : ofstream ;
file - > open ( fileName . str ( ) . c_str ( ) ) ;
if ( file - > is_open ( ) & & file - > good ( ) ) {
2013-02-15 22:06:54 +04:00
ostringstream out ;
fix ( out , PRECISION ) ;
manager . OutputSearchGraphAsSLF ( m_lineNumber , out ) ;
2013-03-04 21:07:37 +04:00
* file < < out . str ( ) ;
file - > flush ( ) ;
2013-02-15 22:06:54 +04:00
} else {
TRACE_ERR ( " Cannot output HTK standard lattice for line " < < m_lineNumber < < " because the output file is not open or not ready for writing " < < std : : endl ) ;
}
}
2013-02-22 21:24:35 +04:00
// Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
2013-03-04 21:07:37 +04:00
if ( m_outputSearchGraphHypergraph ) {
2013-03-22 00:48:47 +04:00
vector < string > hypergraphParameters = staticData . GetParam ( " output-search-graph-hypergraph " ) ;
bool appendSuffix ;
if ( hypergraphParameters . size ( ) > 0 & & hypergraphParameters [ 0 ] = = " true " ) {
appendSuffix = true ;
} else {
appendSuffix = false ;
}
string compression ;
if ( hypergraphParameters . size ( ) > 1 ) {
compression = hypergraphParameters [ 1 ] ;
} else {
compression = " txt " ;
}
string hypergraphDir ;
if ( hypergraphParameters . size ( ) > 2 ) {
2013-03-23 19:58:07 +04:00
hypergraphDir = hypergraphParameters [ 2 ] ;
2013-03-22 00:48:47 +04:00
} else {
string nbestFile = staticData . GetNBestFilePath ( ) ;
2013-03-22 20:14:28 +04:00
if ( ! nbestFile . empty ( ) & & nbestFile ! = " - " & & ! boost : : starts_with ( nbestFile , " /dev/stdout " ) ) {
2013-03-22 00:48:47 +04:00
boost : : filesystem : : path nbestPath ( nbestFile ) ;
2013-04-29 20:24:44 +04:00
// In the Boost filesystem API version 2,
// which was the default prior to Boost 1.46,
// the filename() method returned a string.
//
// In the Boost filesystem API version 3,
// which is the default starting with Boost 1.46,
// the filename() method returns a path object.
//
// To get a string from the path object,
// the native() method must be called.
// hypergraphDir = nbestPath.parent_path().filename()
//#if BOOST_VERSION >= 104600
// .native()
//#endif
//;
// Hopefully the following compiles under all versions of Boost.
//
// If this line gives you compile errors,
// contact Lane Schwartz on the Moses mailing list
hypergraphDir = nbestPath . parent_path ( ) . string ( ) ;
2013-03-22 00:48:47 +04:00
} else {
stringstream hypergraphDirName ;
hypergraphDirName < < boost : : filesystem : : current_path ( ) < < " /hypergraph " ;
hypergraphDir = hypergraphDirName . str ( ) ;
}
}
if ( ! boost : : filesystem : : exists ( hypergraphDir ) ) {
boost : : filesystem : : create_directory ( hypergraphDir ) ;
2013-03-21 23:19:31 +04:00
}
2013-03-22 00:48:47 +04:00
if ( ! boost : : filesystem : : exists ( hypergraphDir ) ) {
TRACE_ERR ( " Cannot output hypergraphs to " < < hypergraphDir < < " because the directory does not exist " < < std : : endl ) ;
} else if ( ! boost : : filesystem : : is_directory ( hypergraphDir ) ) {
TRACE_ERR ( " Cannot output hypergraphs to " < < hypergraphDir < < " because that path exists, but is not a directory " < < std : : endl ) ;
2013-02-22 21:24:35 +04:00
} else {
2013-03-22 00:48:47 +04:00
stringstream fileName ;
fileName < < hypergraphDir < < " / " < < m_lineNumber ;
if ( appendSuffix ) {
fileName < < " . " < < compression ;
}
boost : : iostreams : : filtering_ostream * file = new boost : : iostreams : : filtering_ostream ;
if ( compression = = " gz " ) {
file - > push ( boost : : iostreams : : gzip_compressor ( ) ) ;
} else if ( compression = = " bz2 " ) {
file - > push ( boost : : iostreams : : bzip2_compressor ( ) ) ;
} else if ( compression ! = " txt " ) {
TRACE_ERR ( " Unrecognized hypergraph compression format ( " < < compression < < " ) - using uncompressed plain txt " < < std : : endl ) ;
compression = " txt " ;
}
file - > push ( boost : : iostreams : : file_sink ( fileName . str ( ) , ios_base : : out ) ) ;
if ( file - > is_complete ( ) & & file - > good ( ) ) {
fix ( * file , PRECISION ) ;
manager . OutputSearchGraphAsHypergraph ( m_lineNumber , * file ) ;
file - > flush ( ) ;
} else {
TRACE_ERR ( " Cannot output hypergraph for line " < < m_lineNumber < < " because the output file " < < fileName . str ( ) < < " is not open or not ready for writing " < < std : : endl ) ;
}
file - > pop ( ) ;
delete file ;
2013-02-22 21:24:35 +04:00
}
}
2011-03-02 22:02:07 +03:00
// apply decision rule and output best translation(s)
2011-02-24 15:39:29 +03:00
if ( m_outputCollector ) {
ostringstream out ;
ostringstream debug ;
fix ( debug , PRECISION ) ;
2011-03-02 22:02:07 +03:00
// all derivations - send them to debug stream
2011-02-24 15:39:29 +03:00
if ( staticData . PrintAllDerivations ( ) ) {
manager . PrintAllDerivations ( m_lineNumber , debug ) ;
}
2011-03-02 22:02:07 +03:00
// MAP decoding: best hypothesis
2011-02-24 15:39:29 +03:00
const Hypothesis * bestHypo = NULL ;
2011-03-02 22:02:07 +03:00
if ( ! staticData . UseMBR ( ) )
2013-03-13 16:12:33 +04:00
{
2011-02-24 15:39:29 +03:00
bestHypo = manager . GetBestHypothesis ( ) ;
if ( bestHypo ) {
if ( staticData . IsPathRecoveryEnabled ( ) ) {
OutputInput ( out , bestHypo ) ;
out < < " ||| " ;
}
2013-02-11 22:01:33 +04:00
if ( staticData . GetParam ( " print-id " ) . size ( ) & & Scan < bool > ( staticData . GetParam ( " print-id " ) [ 0 ] ) ) {
out < < m_source - > GetTranslationId ( ) < < " " ;
}
2012-04-19 01:09:02 +04:00
OutputBestSurface (
2011-02-24 15:39:29 +03:00
out ,
bestHypo ,
staticData . GetOutputFactorOrder ( ) ,
staticData . GetReportSegmentation ( ) ,
staticData . GetReportAllFactors ( ) ) ;
2013-03-13 16:12:33 +04:00
if ( staticData . PrintAlignmentInfo ( ) ) {
out < < " ||| " ;
OutputAlignment ( out , bestHypo ) ;
}
2011-02-24 15:39:29 +03:00
OutputAlignment ( m_alignmentInfoCollector , m_lineNumber , bestHypo ) ;
IFVERBOSE ( 1 ) {
debug < < " BEST TRANSLATION: " < < * bestHypo < < endl ;
}
2010-02-18 17:15:34 +03:00
}
2011-02-24 15:39:29 +03:00
out < < endl ;
2013-03-13 16:12:33 +04:00
}
2011-03-02 22:02:07 +03:00
// MBR decoding (n-best MBR, lattice MBR, consensus)
else
{
// we first need the n-best translations
2011-02-24 15:39:29 +03:00
size_t nBestSize = staticData . GetMBRSize ( ) ;
if ( nBestSize < = 0 ) {
cerr < < " ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size) " < < endl ;
exit ( 1 ) ;
}
TrellisPathList nBestList ;
manager . CalcNBest ( nBestSize , nBestList , true ) ;
VERBOSE ( 2 , " size of n-best: " < < nBestList . GetSize ( ) < < " ( " < < nBestSize < < " ) " < < endl ) ;
IFVERBOSE ( 2 ) {
PrintUserTime ( " calculated n-best list for (L)MBR decoding " ) ;
2010-02-18 17:15:34 +03:00
}
2010-05-27 12:37:25 +04:00
2011-03-02 22:02:07 +03:00
// lattice MBR
2011-02-24 15:39:29 +03:00
if ( staticData . UseLatticeMBR ( ) ) {
if ( m_nbestCollector ) {
//lattice mbr nbest
vector < LatticeMBRSolution > solutions ;
size_t n = min ( nBestSize , staticData . GetNBestSize ( ) ) ;
getLatticeMBRNBest ( manager , nBestList , solutions , n ) ;
ostringstream out ;
OutputLatticeMBRNBest ( out , solutions , m_lineNumber ) ;
m_nbestCollector - > Write ( m_lineNumber , out . str ( ) ) ;
} else {
//Lattice MBR decoding
vector < Word > mbrBestHypo = doLatticeMBR ( manager , nBestList ) ;
OutputBestHypo ( mbrBestHypo , m_lineNumber , staticData . GetReportSegmentation ( ) ,
staticData . GetReportAllFactors ( ) , out ) ;
IFVERBOSE ( 2 ) {
PrintUserTime ( " finished Lattice MBR decoding " ) ;
}
}
2011-03-02 22:02:07 +03:00
}
// consensus decoding
else if ( staticData . UseConsensusDecoding ( ) ) {
2011-02-24 15:39:29 +03:00
const TrellisPath & conBestHypo = doConsensusDecoding ( manager , nBestList ) ;
OutputBestHypo ( conBestHypo , m_lineNumber ,
staticData . GetReportSegmentation ( ) ,
staticData . GetReportAllFactors ( ) , out ) ;
OutputAlignment ( m_alignmentInfoCollector , m_lineNumber , conBestHypo ) ;
IFVERBOSE ( 2 ) {
PrintUserTime ( " finished Consensus decoding " ) ;
}
2011-03-02 22:02:07 +03:00
}
// n-best MBR decoding
else {
2011-02-24 15:39:29 +03:00
const Moses : : TrellisPath & mbrBestHypo = doMBR ( nBestList ) ;
OutputBestHypo ( mbrBestHypo , m_lineNumber ,
staticData . GetReportSegmentation ( ) ,
staticData . GetReportAllFactors ( ) , out ) ;
OutputAlignment ( m_alignmentInfoCollector , m_lineNumber , mbrBestHypo ) ;
IFVERBOSE ( 2 ) {
PrintUserTime ( " finished MBR decoding " ) ;
}
}
}
2011-03-02 22:02:07 +03:00
// report best translation to output collector
2011-02-24 15:39:29 +03:00
m_outputCollector - > Write ( m_lineNumber , out . str ( ) , debug . str ( ) ) ;
}
2011-03-02 22:02:07 +03:00
// output n-best list
2011-02-24 15:39:29 +03:00
if ( m_nbestCollector & & ! staticData . UseLatticeMBR ( ) ) {
TrellisPathList nBestList ;
ostringstream out ;
manager . CalcNBest ( staticData . GetNBestSize ( ) , nBestList , staticData . GetDistinctNBest ( ) ) ;
2013-05-11 17:13:26 +04:00
OutputNBest ( out , nBestList , staticData . GetOutputFactorOrder ( ) , m_lineNumber ,
2012-04-19 01:09:02 +04:00
staticData . GetReportSegmentation ( ) ) ;
2011-02-24 15:39:29 +03:00
m_nbestCollector - > Write ( m_lineNumber , out . str ( ) ) ;
}
2010-05-27 12:37:25 +04:00
2011-10-04 19:46:24 +04:00
//lattice samples
if ( m_latticeSamplesCollector ) {
TrellisPathList latticeSamples ;
ostringstream out ;
manager . CalcLatticeSamples ( staticData . GetLatticeSamplesSize ( ) , latticeSamples ) ;
2013-05-11 17:13:26 +04:00
OutputNBest ( out , latticeSamples , staticData . GetOutputFactorOrder ( ) , m_lineNumber ,
2012-04-19 01:09:02 +04:00
staticData . GetReportSegmentation ( ) ) ;
2011-10-04 19:46:24 +04:00
m_latticeSamplesCollector - > Write ( m_lineNumber , out . str ( ) ) ;
}
2011-03-02 22:02:07 +03:00
// detailed translation reporting
2011-02-24 15:39:29 +03:00
if ( m_detailedTranslationCollector ) {
ostringstream out ;
fix ( out , PRECISION ) ;
2013-05-11 17:13:26 +04:00
TranslationAnalysis : : PrintTranslationAnalysis ( out , manager . GetBestHypothesis ( ) ) ;
2011-02-24 15:39:29 +03:00
m_detailedTranslationCollector - > Write ( m_lineNumber , out . str ( ) ) ;
}
2012-09-21 11:55:37 +04:00
//list of unknown words
if ( m_unknownsCollector ) {
const vector < Phrase * > & unknowns = manager . getSntTranslationOptions ( ) - > GetUnknownSources ( ) ;
ostringstream out ;
for ( size_t i = 0 ; i < unknowns . size ( ) ; + + i ) {
out < < * ( unknowns [ i ] ) ;
}
out < < endl ;
m_unknownsCollector - > Write ( m_lineNumber , out . str ( ) ) ;
}
2011-03-02 22:02:07 +03:00
// report additional statistics
2011-02-24 15:39:29 +03:00
IFVERBOSE ( 2 ) {
PrintUserTime ( " Sentence Decoding Time: " ) ;
}
manager . CalcDecoderStatistics ( ) ;
2012-08-10 23:32:00 +04:00
VERBOSE ( 1 , " Line " < < m_lineNumber < < " : Translation took " < < translationTime < < " seconds total " < < endl ) ;
2011-02-24 15:39:29 +03:00
}
~ TranslationTask ( ) {
delete m_source ;
}
private :
InputType * m_source ;
size_t m_lineNumber ;
OutputCollector * m_outputCollector ;
OutputCollector * m_nbestCollector ;
2011-10-04 19:46:24 +04:00
OutputCollector * m_latticeSamplesCollector ;
2011-02-24 15:39:29 +03:00
OutputCollector * m_wordGraphCollector ;
OutputCollector * m_searchGraphCollector ;
OutputCollector * m_detailedTranslationCollector ;
OutputCollector * m_alignmentInfoCollector ;
2012-09-21 11:55:37 +04:00
OutputCollector * m_unknownsCollector ;
2013-03-04 21:07:37 +04:00
bool m_outputSearchGraphSLF ;
bool m_outputSearchGraphHypergraph ;
2011-02-24 15:39:29 +03:00
std : : ofstream * m_alignmentStream ;
2010-05-27 12:37:25 +04:00
} ;
2011-02-24 15:39:29 +03:00
static void PrintFeatureWeight ( const FeatureFunction * ff )
{
2011-01-05 16:49:44 +03:00
size_t numScoreComps = ff - > GetNumScoreComponents ( ) ;
2013-05-08 18:34:56 +04:00
if ( numScoreComps ! = 0 ) {
2011-01-05 16:49:44 +03:00
vector < float > values = StaticData : : Instance ( ) . GetAllWeights ( ) . GetScoresForProducer ( ff ) ;
2013-03-14 23:06:01 +04:00
cout < < ff - > GetScoreProducerDescription ( ) < < " = " ;
2012-12-17 21:17:44 +04:00
for ( size_t i = 0 ; i < numScoreComps ; + + i ) {
cout < < " " < < values [ i ] ;
}
cout < < endl ;
2010-11-29 19:44:28 +03:00
}
2012-01-18 16:26:51 +04:00
else {
if ( ff - > GetSparseProducerWeight ( ) = = 1 )
2013-03-14 23:06:01 +04:00
cout < < ff - > GetScoreProducerDescription ( ) < < " = sparse " < < endl ;
2012-01-18 16:26:51 +04:00
else
2013-03-14 23:06:01 +04:00
cout < < ff - > GetScoreProducerDescription ( ) < < " = " < < ff - > GetSparseProducerWeight ( ) < < endl ;
2012-01-20 19:35:55 +04:00
}
}
2010-11-29 19:44:28 +03:00
2011-02-24 15:39:29 +03:00
static void ShowWeights ( )
{
2012-11-09 16:11:49 +04:00
//TODO: Find a way of ensuring this order is synced with the nbest
2010-11-29 19:44:28 +03:00
fix ( cout , 6 ) ;
const StaticData & staticData = StaticData : : Instance ( ) ;
2012-12-31 04:57:21 +04:00
const vector < const StatelessFeatureFunction * > & slf = StatelessFeatureFunction : : GetStatelessFeatureFunctions ( ) ;
const vector < const StatefulFeatureFunction * > & sff = StatefulFeatureFunction : : GetStatefulFeatureFunctions ( ) ;
2012-12-06 21:13:00 +04:00
2010-11-29 19:44:28 +03:00
for ( size_t i = 0 ; i < sff . size ( ) ; + + i ) {
2012-12-15 23:20:07 +04:00
const StatefulFeatureFunction * ff = sff [ i ] ;
if ( ff - > IsTuneable ( ) ) {
PrintFeatureWeight ( ff ) ;
}
2010-11-29 19:44:28 +03:00
}
for ( size_t i = 0 ; i < slf . size ( ) ; + + i ) {
2012-12-15 23:20:07 +04:00
const StatelessFeatureFunction * ff = slf [ i ] ;
if ( ff - > IsTuneable ( ) ) {
PrintFeatureWeight ( ff ) ;
}
2012-11-09 16:11:49 +04:00
}
2010-11-29 19:44:28 +03:00
}
2013-02-23 01:20:03 +04:00
size_t OutputFeatureWeightsForHypergraph ( size_t index , const FeatureFunction * ff , std : : ostream & outputSearchGraphStream )
{
size_t numScoreComps = ff - > GetNumScoreComponents ( ) ;
2013-05-08 18:34:56 +04:00
if ( numScoreComps ! = 0 ) {
2013-02-23 01:20:03 +04:00
vector < float > values = StaticData : : Instance ( ) . GetAllWeights ( ) . GetScoresForProducer ( ff ) ;
if ( numScoreComps > 1 ) {
for ( size_t i = 0 ; i < numScoreComps ; + + i ) {
2013-02-24 04:31:29 +04:00
outputSearchGraphStream < < ff - > GetScoreProducerDescription ( )
2013-02-23 01:20:03 +04:00
< < i
< < " = " < < values [ i ] < < endl ;
}
} else {
2013-02-24 04:31:29 +04:00
outputSearchGraphStream < < ff - > GetScoreProducerDescription ( )
2013-02-23 01:20:03 +04:00
< < " = " < < values [ 0 ] < < endl ;
}
return index + numScoreComps ;
} else {
cerr < < " Sparse features are not yet supported when outputting hypergraph format " < < endl ;
assert ( false ) ;
return 0 ;
}
}
void OutputFeatureWeightsForHypergraph ( std : : ostream & outputSearchGraphStream )
{
outputSearchGraphStream . setf ( std : : ios : : fixed ) ;
outputSearchGraphStream . precision ( 6 ) ;
const StaticData & staticData = StaticData : : Instance ( ) ;
2013-02-24 04:31:29 +04:00
const vector < const StatelessFeatureFunction * > & slf = StatelessFeatureFunction : : GetStatelessFeatureFunctions ( ) ;
const vector < const StatefulFeatureFunction * > & sff = StatefulFeatureFunction : : GetStatefulFeatureFunctions ( ) ;
2013-02-23 01:20:03 +04:00
size_t featureIndex = 1 ;
for ( size_t i = 0 ; i < sff . size ( ) ; + + i ) {
featureIndex = OutputFeatureWeightsForHypergraph ( featureIndex , sff [ i ] , outputSearchGraphStream ) ;
}
for ( size_t i = 0 ; i < slf . size ( ) ; + + i ) {
2013-02-24 04:31:29 +04:00
/*
2013-02-23 01:20:03 +04:00
if ( slf [ i ] - > GetScoreProducerWeightShortName ( ) ! = " u " & &
slf [ i ] - > GetScoreProducerWeightShortName ( ) ! = " tm " & &
slf [ i ] - > GetScoreProducerWeightShortName ( ) ! = " I " & &
slf [ i ] - > GetScoreProducerWeightShortName ( ) ! = " g " )
2013-02-24 04:31:29 +04:00
*/
2013-02-23 01:20:03 +04:00
{
featureIndex = OutputFeatureWeightsForHypergraph ( featureIndex , slf [ i ] , outputSearchGraphStream ) ;
}
}
2013-02-24 04:31:29 +04:00
const vector < PhraseDictionary * > & pds = staticData . GetPhraseDictionaries ( ) ;
2013-02-23 01:20:03 +04:00
for ( size_t i = 0 ; i < pds . size ( ) ; i + + ) {
featureIndex = OutputFeatureWeightsForHypergraph ( featureIndex , pds [ i ] , outputSearchGraphStream ) ;
}
2013-02-24 04:31:29 +04:00
const vector < const GenerationDictionary * > & gds = staticData . GetGenerationDictionaries ( ) ;
2013-02-23 01:20:03 +04:00
for ( size_t i = 0 ; i < gds . size ( ) ; i + + ) {
featureIndex = OutputFeatureWeightsForHypergraph ( featureIndex , gds [ i ] , outputSearchGraphStream ) ;
}
}
2012-07-02 20:05:11 +04:00
} //namespace
2011-03-02 22:02:07 +03:00
/** main function of the command line version of the decoder **/
2011-02-24 15:39:29 +03:00
int main ( int argc , char * * argv )
{
2012-01-13 19:20:42 +04:00
try {
2010-05-27 12:37:25 +04:00
# ifdef HAVE_PROTOBUF
2012-01-13 19:20:42 +04:00
GOOGLE_PROTOBUF_VERIFY_VERSION ;
2010-05-27 12:37:25 +04:00
# endif
2011-03-02 22:02:07 +03:00
2012-01-13 19:20:42 +04:00
// echo command line, if verbose
IFVERBOSE ( 1 ) {
TRACE_ERR ( " command: " ) ;
for ( int i = 0 ; i < argc ; + + i ) TRACE_ERR ( argv [ i ] < < " " ) ;
TRACE_ERR ( endl ) ;
2010-05-27 12:37:25 +04:00
}
2012-01-13 19:20:42 +04:00
// set number of significant decimals in output
fix ( cout , PRECISION ) ;
fix ( cerr , PRECISION ) ;
2008-10-30 22:44:54 +03:00
2012-01-13 19:20:42 +04:00
// load all the settings into the Parameter class
// (stores them as strings, or array of strings)
2013-03-15 16:30:39 +04:00
Parameter params ;
if ( ! params . LoadParam ( argc , argv ) ) {
2012-01-13 19:20:42 +04:00
exit ( 1 ) ;
}
2011-02-24 15:39:29 +03:00
2011-08-30 16:25:50 +04:00
2012-01-13 19:20:42 +04:00
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
2013-03-15 16:30:39 +04:00
if ( ! StaticData : : LoadDataStatic ( & params , argv [ 0 ] ) ) {
2012-01-13 19:20:42 +04:00
exit ( 1 ) ;
}
2007-01-22 19:21:28 +03:00
2012-01-13 19:20:42 +04:00
// setting "-show-weights" -> just dump out weights and exit
2013-03-15 16:30:39 +04:00
if ( params . isParamSpecified ( " show-weights " ) ) {
2012-01-13 19:20:42 +04:00
ShowWeights ( ) ;
exit ( 0 ) ;
}
2011-08-18 01:13:21 +04:00
2012-01-13 19:20:42 +04:00
// shorthand for accessing information in StaticData
const StaticData & staticData = StaticData : : Instance ( ) ;
2011-03-02 22:02:07 +03:00
2011-09-23 02:29:56 +04:00
2012-01-13 19:20:42 +04:00
//initialise random numbers
srand ( time ( NULL ) ) ;
2011-10-04 19:46:24 +04:00
2012-01-13 19:20:42 +04:00
// set up read/writing class
2012-06-29 07:19:28 +04:00
IOWrapper * ioWrapper = GetIOWrapper ( staticData ) ;
2012-01-13 19:20:42 +04:00
if ( ! ioWrapper ) {
cerr < < " Error; Failed to create IO object " < < endl ;
exit ( 1 ) ;
}
2011-02-24 15:39:29 +03:00
2012-01-13 19:20:42 +04:00
// check on weights
2012-05-25 00:11:35 +04:00
const ScoreComponentCollection & weights = staticData . GetAllWeights ( ) ;
2012-01-13 19:20:42 +04:00
IFVERBOSE ( 2 ) {
2012-05-25 00:11:35 +04:00
TRACE_ERR ( " The global weight vector looks like this: " ) ;
TRACE_ERR ( weights ) ;
2012-01-13 19:20:42 +04:00
TRACE_ERR ( " \n " ) ;
2010-05-27 12:37:25 +04:00
}
2013-03-22 20:14:28 +04:00
if ( staticData . GetOutputSearchGraphHypergraph ( ) ) {
2013-03-22 00:48:47 +04:00
ofstream * weightsOut = new std : : ofstream ;
2013-03-22 20:14:28 +04:00
stringstream weightsFilename ;
if ( staticData . GetParam ( " output-search-graph-hypergraph " ) . size ( ) > 3 ) {
weightsFilename < < staticData . GetParam ( " output-search-graph-hypergraph " ) [ 3 ] ;
} else {
string nbestFile = staticData . GetNBestFilePath ( ) ;
if ( ! nbestFile . empty ( ) & & nbestFile ! = " - " & & ! boost : : starts_with ( nbestFile , " /dev/stdout " ) ) {
boost : : filesystem : : path nbestPath ( nbestFile ) ;
weightsFilename < < nbestPath . parent_path ( ) . filename ( ) < < " /weights " ;
} else {
weightsFilename < < boost : : filesystem : : current_path ( ) < < " /hypergraph/weights " ;
}
}
boost : : filesystem : : path weightsFilePath ( weightsFilename . str ( ) ) ;
if ( ! boost : : filesystem : : exists ( weightsFilePath . parent_path ( ) ) ) {
boost : : filesystem : : create_directory ( weightsFilePath . parent_path ( ) ) ;
}
TRACE_ERR ( " The weights file is " < < weightsFilename . str ( ) < < " \n " ) ;
weightsOut - > open ( weightsFilename . str ( ) . c_str ( ) ) ;
2013-02-23 01:20:03 +04:00
OutputFeatureWeightsForHypergraph ( * weightsOut ) ;
weightsOut - > flush ( ) ;
weightsOut - > close ( ) ;
delete weightsOut ;
}
2011-02-24 15:39:29 +03:00
2012-01-13 19:20:42 +04:00
// initialize output streams
// note: we can't just write to STDOUT or files
// because multithreading may return sentences in shuffled order
auto_ptr < OutputCollector > outputCollector ; // for translations
auto_ptr < OutputCollector > nbestCollector ; // for n-best lists
auto_ptr < OutputCollector > latticeSamplesCollector ; //for lattice samples
auto_ptr < ofstream > nbestOut ;
auto_ptr < ofstream > latticeSamplesOut ;
size_t nbestSize = staticData . GetNBestSize ( ) ;
string nbestFile = staticData . GetNBestFilePath ( ) ;
bool output1best = true ;
if ( nbestSize ) {
if ( nbestFile = = " - " | | nbestFile = = " /dev/stdout " ) {
// nbest to stdout, no 1-best
nbestCollector . reset ( new OutputCollector ( ) ) ;
output1best = false ;
} else {
// nbest to file, 1-best to stdout
nbestOut . reset ( new ofstream ( nbestFile . c_str ( ) ) ) ;
if ( ! nbestOut - > good ( ) ) {
TRACE_ERR ( " ERROR: Failed to open " < < nbestFile < < " for nbest lists " < < endl ) ;
exit ( 1 ) ;
}
nbestCollector . reset ( new OutputCollector ( nbestOut . get ( ) ) ) ;
2011-10-04 19:46:24 +04:00
}
2010-05-27 12:37:25 +04:00
}
2012-01-13 19:20:42 +04:00
size_t latticeSamplesSize = staticData . GetLatticeSamplesSize ( ) ;
string latticeSamplesFile = staticData . GetLatticeSamplesFilePath ( ) ;
if ( latticeSamplesSize ) {
if ( latticeSamplesFile = = " - " | | latticeSamplesFile = = " /dev/stdout " ) {
latticeSamplesCollector . reset ( new OutputCollector ( ) ) ;
output1best = false ;
} else {
latticeSamplesOut . reset ( new ofstream ( latticeSamplesFile . c_str ( ) ) ) ;
if ( ! latticeSamplesOut - > good ( ) ) {
TRACE_ERR ( " ERROR: Failed to open " < < latticeSamplesFile < < " for lattice samples " < < endl ) ;
exit ( 1 ) ;
}
latticeSamplesCollector . reset ( new OutputCollector ( latticeSamplesOut . get ( ) ) ) ;
2011-10-04 19:46:24 +04:00
}
}
2012-01-13 19:20:42 +04:00
if ( output1best ) {
outputCollector . reset ( new OutputCollector ( ) ) ;
}
// initialize stream for word graph (aka: output lattice)
auto_ptr < OutputCollector > wordGraphCollector ;
if ( staticData . GetOutputWordGraph ( ) ) {
wordGraphCollector . reset ( new OutputCollector ( & ( ioWrapper - > GetOutputWordGraphStream ( ) ) ) ) ;
}
// initialize stream for search graph
// note: this is essentially the same as above, but in a different format
auto_ptr < OutputCollector > searchGraphCollector ;
if ( staticData . GetOutputSearchGraph ( ) ) {
searchGraphCollector . reset ( new OutputCollector ( & ( ioWrapper - > GetOutputSearchGraphStream ( ) ) ) ) ;
}
// initialize stram for details about the decoder run
auto_ptr < OutputCollector > detailedTranslationCollector ;
if ( staticData . IsDetailedTranslationReportingEnabled ( ) ) {
detailedTranslationCollector . reset ( new OutputCollector ( & ( ioWrapper - > GetDetailedTranslationReportingStream ( ) ) ) ) ;
}
// initialize stram for word alignment between input and output
auto_ptr < OutputCollector > alignmentInfoCollector ;
if ( ! staticData . GetAlignmentOutputFile ( ) . empty ( ) ) {
alignmentInfoCollector . reset ( new OutputCollector ( ioWrapper - > GetAlignmentOutputStream ( ) ) ) ;
}
2012-09-21 11:55:37 +04:00
//initialise stream for unknown (oov) words
auto_ptr < OutputCollector > unknownsCollector ;
auto_ptr < ofstream > unknownsStream ;
if ( ! staticData . GetOutputUnknownsFile ( ) . empty ( ) ) {
unknownsStream . reset ( new ofstream ( staticData . GetOutputUnknownsFile ( ) . c_str ( ) ) ) ;
if ( ! unknownsStream - > good ( ) ) {
TRACE_ERR ( " Unable to open " < < staticData . GetOutputUnknownsFile ( ) < < " for unknowns " ) ;
exit ( 1 ) ;
}
unknownsCollector . reset ( new OutputCollector ( unknownsStream . get ( ) ) ) ;
}
2012-01-13 19:20:42 +04:00
2011-09-23 02:29:56 +04:00
# ifdef WITH_THREADS
2012-01-13 19:20:42 +04:00
ThreadPool pool ( staticData . ThreadCount ( ) ) ;
2011-09-23 02:29:56 +04:00
# endif
2012-01-13 19:20:42 +04:00
// main loop over set of input sentences
InputType * source = NULL ;
2013-02-14 00:52:40 +04:00
size_t lineCount = staticData . GetStartTranslationId ( ) ;
2012-01-13 19:20:42 +04:00
while ( ReadInput ( * ioWrapper , staticData . GetInputType ( ) , source ) ) {
IFVERBOSE ( 1 ) {
ResetUserTime ( ) ;
}
// set up task of translating one sentence
TranslationTask * task =
new TranslationTask ( lineCount , source , outputCollector . get ( ) ,
nbestCollector . get ( ) ,
latticeSamplesCollector . get ( ) ,
wordGraphCollector . get ( ) ,
searchGraphCollector . get ( ) ,
detailedTranslationCollector . get ( ) ,
2012-09-21 11:55:37 +04:00
alignmentInfoCollector . get ( ) ,
2013-02-15 22:06:54 +04:00
unknownsCollector . get ( ) ,
2013-03-04 21:07:37 +04:00
staticData . GetOutputSearchGraphSLF ( ) ,
staticData . GetOutputSearchGraphHypergraph ( ) ) ;
2012-01-13 19:20:42 +04:00
// execute task
2010-05-27 12:37:25 +04:00
# ifdef WITH_THREADS
2012-01-13 19:20:42 +04:00
pool . Submit ( task ) ;
2010-05-27 12:37:25 +04:00
# else
2012-01-13 19:20:42 +04:00
task - > Run ( ) ;
2012-03-30 23:25:42 +04:00
delete task ;
2010-05-27 12:37:25 +04:00
# endif
2012-01-13 19:20:42 +04:00
source = NULL ; //make sure it doesn't get deleted
+ + lineCount ;
}
2011-03-02 22:02:07 +03:00
// we are done, finishing up
2010-05-27 12:37:25 +04:00
# ifdef WITH_THREADS
2012-01-13 19:20:42 +04:00
pool . Stop ( true ) ; //flush remaining jobs
2010-05-27 12:37:25 +04:00
# endif
2013-03-15 20:11:15 +04:00
delete ioWrapper ;
2012-01-13 19:20:42 +04:00
} catch ( const std : : exception & e ) {
std : : cerr < < " Exception: " < < e . what ( ) < < std : : endl ;
return EXIT_FAILURE ;
}
2012-10-05 20:49:52 +04:00
IFVERBOSE ( 1 ) util : : PrintUsage ( std : : cerr ) ;
2010-05-27 12:37:25 +04:00
# ifndef EXIT_RETURN
2011-02-24 15:39:29 +03:00
//This avoids that destructors are called (it can take a long time)
exit ( EXIT_SUCCESS ) ;
2010-05-27 12:37:25 +04:00
# else
2011-02-24 15:39:29 +03:00
return EXIT_SUCCESS ;
2010-05-27 12:37:25 +04:00
# endif
2006-07-04 22:04:38 +04:00
}