2008-06-11 14:52:57 +04:00
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase - based language decoder
Copyright ( C ) 2006 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include <string>
2011-11-18 16:07:41 +04:00
# include "util/check.hh"
2008-06-11 14:52:57 +04:00
# include "PhraseDictionaryMemory.h"
# include "DecodeStepTranslation.h"
# include "DecodeStepGeneration.h"
# include "GenerationDictionary.h"
# include "DummyScoreProducers.h"
# include "StaticData.h"
# include "Util.h"
# include "FactorCollection.h"
# include "Timer.h"
2011-10-13 18:27:01 +04:00
# include "LM/Factory.h"
2008-06-11 14:52:57 +04:00
# include "LexicalReordering.h"
2009-05-26 23:30:35 +04:00
# include "GlobalLexicalModel.h"
2012-01-31 14:31:39 +04:00
# include "GlobalLexicalModelUnlimited.h"
2008-06-11 14:52:57 +04:00
# include "SentenceStats.h"
2011-05-11 02:02:25 +04:00
# include "PhraseBoundaryFeature.h"
2010-04-26 18:56:06 +04:00
# include "PhraseDictionary.h"
2011-09-20 19:32:26 +04:00
# include "SparsePhraseDictionaryFeature.h"
2011-03-22 17:33:16 +03:00
# include "PhrasePairFeature.h"
2011-08-06 18:10:43 +04:00
# include "PhraseLengthFeature.h"
2011-08-13 04:25:23 +04:00
# include "TargetWordInsertionFeature.h"
2011-08-13 05:39:35 +04:00
# include "SourceWordDeletionFeature.h"
2011-08-13 06:40:54 +04:00
# include "WordTranslationFeature.h"
2008-06-11 14:52:57 +04:00
# include "UserMessage.h"
# include "TranslationOption.h"
2010-09-17 17:36:03 +04:00
# include "TargetBigramFeature.h"
2011-11-04 20:40:12 +04:00
# include "TargetNgramFeature.h"
2008-06-11 14:52:57 +04:00
# include "DecodeGraph.h"
2008-06-19 03:14:09 +04:00
# include "InputFileStream.h"
2010-09-16 19:45:56 +04:00
# include "BleuScoreFeature.h"
2010-09-17 18:25:08 +04:00
# include "ScoreComponentCollection.h"
2008-06-11 14:52:57 +04:00
2011-05-13 23:28:23 +04:00
# ifdef HAVE_SYNLM
# include "SyntacticLanguageModel.h"
# endif
2011-09-23 02:29:56 +04:00
# ifdef WITH_THREADS
# include <boost/thread.hpp>
# endif
2008-06-11 14:52:57 +04:00
using namespace std ;
2008-10-09 03:51:26 +04:00
namespace Moses
{
2011-02-24 16:14:42 +03:00
static size_t CalcMax ( size_t x , const vector < size_t > & y )
{
2008-06-11 14:52:57 +04:00
size_t max = x ;
for ( vector < size_t > : : const_iterator i = y . begin ( ) ; i ! = y . end ( ) ; + + i )
if ( * i > max ) max = * i ;
return max ;
}
2011-02-24 16:14:42 +03:00
static size_t CalcMax ( size_t x , const vector < size_t > & y , const vector < size_t > & z )
{
2008-06-11 14:52:57 +04:00
size_t max = x ;
for ( vector < size_t > : : const_iterator i = y . begin ( ) ; i ! = y . end ( ) ; + + i )
if ( * i > max ) max = * i ;
for ( vector < size_t > : : const_iterator i = z . begin ( ) ; i ! = z . end ( ) ; + + i )
if ( * i > max ) max = * i ;
return max ;
}
StaticData StaticData : : s_instance ;
StaticData : : StaticData ( )
2011-08-19 20:09:36 +04:00
: m_targetBigramFeature ( NULL )
, m_phraseBoundaryFeature ( NULL )
, m_phrasePairFeature ( NULL )
, m_phraseLengthFeature ( NULL )
, m_targetWordInsertionFeature ( NULL )
, m_sourceWordDeletionFeature ( NULL )
, m_wordTranslationFeature ( NULL )
, m_numLinkParams ( 1 )
2011-02-24 16:14:42 +03:00
, m_fLMsLoaded ( false )
, m_sourceStartPosMattersForRecombination ( false )
, m_inputType ( SentenceInput )
, m_numInputScores ( 0 )
2011-08-19 20:09:36 +04:00
, m_bleuScoreFeature ( NULL )
2011-02-24 16:14:42 +03:00
, m_detailedTranslationReportingFilePath ( )
, m_onlyDistinctNBest ( false )
, m_factorDelimiter ( " | " ) // default delimiter between factors
2011-09-20 14:23:38 +04:00
, m_lmEnableOOVFeature ( false )
2011-02-24 16:14:42 +03:00
, m_isAlwaysCreateDirectTranslationOption ( false )
2010-09-14 20:55:33 +04:00
2008-06-11 14:52:57 +04:00
{
m_maxFactorIdx [ 0 ] = 0 ; // source side
m_maxFactorIdx [ 1 ] = 0 ; // target side
2011-11-16 16:38:22 +04:00
m_xmlBrackets . first = " < " ;
m_xmlBrackets . second = " > " ;
2011-02-24 16:14:42 +03:00
// memory pools
Phrase : : InitializeMemPool ( ) ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
2012-04-29 08:37:48 +04:00
void StaticData : : ClearData ( ) {
for ( size_t i = 0 ; i < m_decodeGraphs . size ( ) ; + + i )
delete m_decodeGraphs [ i ] ;
m_decodeGraphs . clear ( ) ;
m_decodeGraphBackoff . clear ( ) ;
m_translationSystems . clear ( ) ;
for ( size_t i = 0 ; i < m_wordPenaltyProducers . size ( ) ; + + i ) {
ScoreComponentCollection : : UnregisterScoreProducer ( m_wordPenaltyProducers [ i ] ) ;
delete m_wordPenaltyProducers [ i ] ;
}
m_wordPenaltyProducers . clear ( ) ;
for ( size_t i = 0 ; i < m_distortionScoreProducers . size ( ) ; + + i ) {
ScoreComponentCollection : : UnregisterScoreProducer ( m_distortionScoreProducers [ i ] ) ;
delete m_distortionScoreProducers [ i ] ;
}
m_distortionScoreProducers . clear ( ) ;
for ( size_t i = 0 ; i < m_phraseDictionary . size ( ) ; + + i ) {
ScoreComponentCollection : : UnregisterScoreProducer ( m_phraseDictionary [ i ] ) ;
delete m_phraseDictionary [ i ] ;
}
m_phraseDictionary . clear ( ) ;
for ( size_t i = 0 ; i < m_reorderModels . size ( ) ; + + i ) {
ScoreComponentCollection : : UnregisterScoreProducer ( m_reorderModels [ i ] ) ;
delete m_reorderModels [ i ] ;
}
m_reorderModels . clear ( ) ;
for ( LMList : : const_iterator k = m_languageModel . begin ( ) ; k ! = m_languageModel . end ( ) ; + + k ) {
ScoreComponentCollection : : UnregisterScoreProducer ( * k ) ;
// delete *k;
}
m_languageModel . CleanUp ( ) ;
ScoreComponentCollection : : UnregisterScoreProducer ( m_bleuScoreFeature ) ;
ScoreComponentCollection : : UnregisterScoreProducer ( m_unknownWordPenaltyProducer ) ;
m_inputFactorOrder . clear ( ) ;
m_outputFactorOrder . clear ( ) ;
ScoreComponentCollection : : ResetCounter ( ) ;
ScoreProducer : : ResetDescriptionCounts ( ) ;
}
2008-06-11 14:52:57 +04:00
bool StaticData : : LoadData ( Parameter * parameter )
{
2011-02-24 16:14:42 +03:00
ResetUserTime ( ) ;
m_parameter = parameter ;
// verbose level
m_verboseLevel = 1 ;
if ( m_parameter - > GetParam ( " verbose " ) . size ( ) = = 1 ) {
m_verboseLevel = Scan < size_t > ( m_parameter - > GetParam ( " verbose " ) [ 0 ] ) ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
// to cube or not to cube
m_searchAlgorithm = ( m_parameter - > GetParam ( " search-algorithm " ) . size ( ) > 0 ) ?
( SearchAlgorithm ) Scan < size_t > ( m_parameter - > GetParam ( " search-algorithm " ) [ 0 ] ) : Normal ;
if ( m_searchAlgorithm = = ChartDecoding )
LoadChartDecodingParameters ( ) ;
else
LoadPhraseBasedParameters ( ) ;
// input type has to be specified BEFORE loading the phrase tables!
if ( m_parameter - > GetParam ( " inputtype " ) . size ( ) )
m_inputType = ( InputTypeEnum ) Scan < int > ( m_parameter - > GetParam ( " inputtype " ) [ 0 ] ) ;
std : : string s_it = " text input " ;
if ( m_inputType = = 1 ) {
s_it = " confusion net " ;
}
if ( m_inputType = = 2 ) {
s_it = " word lattice " ;
}
VERBOSE ( 2 , " input type is: " < < s_it < < " \n " ) ;
if ( m_parameter - > GetParam ( " recover-input-path " ) . size ( ) ) {
m_recoverPath = Scan < bool > ( m_parameter - > GetParam ( " recover-input-path " ) [ 0 ] ) ;
if ( m_recoverPath & & m_inputType = = SentenceInput ) {
TRACE_ERR ( " --recover-input-path should only be used with confusion net or word lattice input! \n " ) ;
m_recoverPath = false ;
2011-02-03 12:08:42 +03:00
}
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
2011-08-26 06:37:52 +04:00
if ( m_parameter - > GetParam ( " sort-word-alignment " ) . size ( ) ) {
m_wordAlignmentSort = ( WordAlignmentSort ) Scan < size_t > ( m_parameter - > GetParam ( " sort-word-alignment " ) [ 0 ] ) ;
}
2011-02-24 16:14:42 +03:00
// factor delimiter
if ( m_parameter - > GetParam ( " factor-delimiter " ) . size ( ) > 0 ) {
m_factorDelimiter = m_parameter - > GetParam ( " factor-delimiter " ) [ 0 ] ;
}
SetBooleanParameter ( & m_continuePartialTranslation , " continue-partial-translation " , false ) ;
//word-to-word alignment
SetBooleanParameter ( & m_UseAlignmentInfo , " use-alignment-info " , false ) ;
SetBooleanParameter ( & m_PrintAlignmentInfo , " print-alignment-info " , false ) ;
SetBooleanParameter ( & m_PrintAlignmentInfoNbest , " print-alignment-info-in-n-best " , false ) ;
SetBooleanParameter ( & m_outputHypoScore , " output-hypo-score " , false ) ;
if ( ! m_UseAlignmentInfo & & m_PrintAlignmentInfo ) {
TRACE_ERR ( " --print-alignment-info should only be used together with \" --use-alignment-info true \" . Continue forcing to false. \n " ) ;
m_PrintAlignmentInfo = false ;
}
if ( ! m_UseAlignmentInfo & & m_PrintAlignmentInfoNbest ) {
TRACE_ERR ( " --print-alignment-info-in-n-best should only be used together with \" --use-alignment-info true \" . Continue forcing to false. \n " ) ;
m_PrintAlignmentInfoNbest = false ;
}
if ( m_parameter - > GetParam ( " alignment-output-file " ) . size ( ) > 0 ) {
m_alignmentOutputFile = Scan < std : : string > ( m_parameter - > GetParam ( " alignment-output-file " ) [ 0 ] ) ;
}
// n-best
if ( m_parameter - > GetParam ( " n-best-list " ) . size ( ) > = 2 ) {
m_nBestFilePath = m_parameter - > GetParam ( " n-best-list " ) [ 0 ] ;
m_nBestSize = Scan < size_t > ( m_parameter - > GetParam ( " n-best-list " ) [ 1 ] ) ;
m_onlyDistinctNBest = ( m_parameter - > GetParam ( " n-best-list " ) . size ( ) > 2 & & m_parameter - > GetParam ( " n-best-list " ) [ 2 ] = = " distinct " ) ;
} else if ( m_parameter - > GetParam ( " n-best-list " ) . size ( ) = = 1 ) {
2011-10-04 19:46:24 +04:00
UserMessage : : Add ( string ( " wrong format for switch -n-best-list file size " ) ) ;
2011-02-24 16:14:42 +03:00
return false ;
} else {
m_nBestSize = 0 ;
}
if ( m_parameter - > GetParam ( " n-best-factor " ) . size ( ) > 0 ) {
m_nBestFactor = Scan < size_t > ( m_parameter - > GetParam ( " n-best-factor " ) [ 0 ] ) ;
} else {
m_nBestFactor = 20 ;
2008-06-11 14:52:57 +04:00
}
2012-05-28 10:03:45 +04:00
// explicit setting of distinct nbest
SetBooleanParameter ( & m_onlyDistinctNBest , " distinct-nbest " , false ) ;
2011-02-24 16:14:42 +03:00
2011-10-04 19:46:24 +04:00
//lattice samples
if ( m_parameter - > GetParam ( " lattice-samples " ) . size ( ) = = 2 ) {
m_latticeSamplesFilePath = m_parameter - > GetParam ( " lattice-samples " ) [ 0 ] ;
m_latticeSamplesSize = Scan < size_t > ( m_parameter - > GetParam ( " lattice-samples " ) [ 1 ] ) ;
} else if ( m_parameter - > GetParam ( " lattice-samples " ) . size ( ) ! = 0 ) {
UserMessage : : Add ( string ( " wrong format for switch -lattice-samples file size " ) ) ;
return false ;
} else {
m_latticeSamplesSize = 0 ;
}
2011-02-24 16:14:42 +03:00
// word graph
if ( m_parameter - > GetParam ( " output-word-graph " ) . size ( ) = = 2 )
m_outputWordGraph = true ;
else
m_outputWordGraph = false ;
// search graph
if ( m_parameter - > GetParam ( " output-search-graph " ) . size ( ) > 0 ) {
if ( m_parameter - > GetParam ( " output-search-graph " ) . size ( ) ! = 1 ) {
UserMessage : : Add ( string ( " ERROR: wrong format for switch -output-search-graph file " ) ) ;
return false ;
2011-08-18 01:13:21 +04:00
}
2011-02-24 16:14:42 +03:00
m_outputSearchGraph = true ;
}
// ... in extended format
else if ( m_parameter - > GetParam ( " output-search-graph-extended " ) . size ( ) > 0 ) {
if ( m_parameter - > GetParam ( " output-search-graph-extended " ) . size ( ) ! = 1 ) {
UserMessage : : Add ( string ( " ERROR: wrong format for switch -output-search-graph-extended file " ) ) ;
return false ;
}
m_outputSearchGraph = true ;
m_outputSearchGraphExtended = true ;
} else
m_outputSearchGraph = false ;
2008-09-24 20:48:23 +04:00
# ifdef HAVE_PROTOBUF
2011-02-24 16:14:42 +03:00
if ( m_parameter - > GetParam ( " output-search-graph-pb " ) . size ( ) > 0 ) {
if ( m_parameter - > GetParam ( " output-search-graph-pb " ) . size ( ) ! = 1 ) {
UserMessage : : Add ( string ( " ERROR: wrong format for switch -output-search-graph-pb path " ) ) ;
return false ;
}
m_outputSearchGraphPB = true ;
} else
m_outputSearchGraphPB = false ;
2008-09-24 20:48:23 +04:00
# endif
2011-09-16 15:58:53 +04:00
SetBooleanParameter ( & m_unprunedSearchGraph , " unpruned-search-graph " , true ) ;
2008-06-11 14:52:57 +04:00
2011-02-24 16:14:42 +03:00
// include feature names in the n-best list
SetBooleanParameter ( & m_labeledNBestList , " labeled-n-best-list " , true ) ;
// include word alignment in the n-best list
SetBooleanParameter ( & m_nBestIncludesAlignment , " include-alignment-in-n-best " , false ) ;
// printing source phrase spans
SetBooleanParameter ( & m_reportSegmentation , " report-segmentation " , false ) ;
// print all factors of output translations
SetBooleanParameter ( & m_reportAllFactors , " report-all-factors " , false ) ;
// print all factors of output translations
SetBooleanParameter ( & m_reportAllFactorsNBest , " report-all-factors-in-n-best " , false ) ;
2011-08-19 20:09:36 +04:00
// caching of translation options
2011-02-24 16:14:42 +03:00
if ( m_inputType = = SentenceInput ) {
SetBooleanParameter ( & m_useTransOptCache , " use-persistent-cache " , true ) ;
m_transOptCacheMaxSize = ( m_parameter - > GetParam ( " persistent-cache-size " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " persistent-cache-size " ) [ 0 ] ) : DEFAULT_MAX_TRANS_OPT_CACHE_SIZE ;
} else {
m_useTransOptCache = false ;
}
2011-08-19 20:09:36 +04:00
SetBooleanParameter ( & m_enableOnlineCommand , " enable-online-command " , false ) ;
if ( m_enableOnlineCommand = = true ) {
VERBOSE ( 1 , " Online commands are enabled. \n " ) ;
VERBOSE ( 1 , " Cache for translation options is disabled. \n " ) ;
m_useTransOptCache = false ;
}
2012-04-29 08:37:48 +04:00
std : : cerr < < " transOptCache: " < < m_useTransOptCache < < std : : endl ;
std : : cerr < < " transOptCache max size: " < < m_transOptCacheMaxSize < < std : : endl ;
2011-02-24 16:14:42 +03:00
//input factors
const vector < string > & inputFactorVector = m_parameter - > GetParam ( " input-factors " ) ;
for ( size_t i = 0 ; i < inputFactorVector . size ( ) ; i + + ) {
m_inputFactorOrder . push_back ( Scan < FactorType > ( inputFactorVector [ i ] ) ) ;
}
if ( m_inputFactorOrder . empty ( ) ) {
UserMessage : : Add ( string ( " no input factor specified in config file " ) ) ;
return false ;
}
//output factors
const vector < string > & outputFactorVector = m_parameter - > GetParam ( " output-factors " ) ;
for ( size_t i = 0 ; i < outputFactorVector . size ( ) ; i + + ) {
m_outputFactorOrder . push_back ( Scan < FactorType > ( outputFactorVector [ i ] ) ) ;
}
if ( m_outputFactorOrder . empty ( ) ) {
// default. output factor 0
m_outputFactorOrder . push_back ( 0 ) ;
}
//source word deletion
SetBooleanParameter ( & m_wordDeletionEnabled , " phrase-drop-allowed " , false ) ;
2008-06-11 14:52:57 +04:00
2010-03-07 10:57:48 +03:00
//Disable discarding
SetBooleanParameter ( & m_disableDiscarding , " disable-discarding " , false ) ;
2011-02-24 16:14:42 +03:00
2010-03-07 10:57:48 +03:00
//Print All Derivations
SetBooleanParameter ( & m_printAllDerivations , " print-all-derivations " , false ) ;
2011-02-24 16:14:42 +03:00
// additional output
if ( m_parameter - > isParamSpecified ( " translation-details " ) ) {
2010-05-08 19:51:59 +04:00
const vector < string > & args = m_parameter - > GetParam ( " translation-details " ) ;
2011-02-24 16:14:42 +03:00
if ( args . size ( ) = = 1 ) {
2010-05-08 19:51:59 +04:00
m_detailedTranslationReportingFilePath = args [ 0 ] ;
2011-02-24 16:14:42 +03:00
} else {
2010-05-08 19:51:59 +04:00
UserMessage : : Add ( string ( " the translation-details option requires exactly one filename argument " ) ) ;
return false ;
}
}
2008-06-11 14:52:57 +04:00
2011-02-24 16:14:42 +03:00
// word penalties
2010-08-10 17:12:00 +04:00
for ( size_t i = 0 ; i < m_parameter - > GetParam ( " weight-w " ) . size ( ) ; + + i ) {
float weightWordPenalty = Scan < float > ( m_parameter - > GetParam ( " weight-w " ) [ i ] ) ;
2010-10-07 02:06:49 +04:00
m_wordPenaltyProducers . push_back ( new WordPenaltyProducer ( ) ) ;
SetWeight ( m_wordPenaltyProducers . back ( ) , weightWordPenalty ) ;
2010-08-10 17:12:00 +04:00
}
2011-02-24 16:14:42 +03:00
float weightUnknownWord = ( m_parameter - > GetParam ( " weight-u " ) . size ( ) > 0 ) ? Scan < float > ( m_parameter - > GetParam ( " weight-u " ) [ 0 ] ) : 1 ;
2011-08-19 20:09:36 +04:00
m_unknownWordPenaltyProducer = new UnknownWordPenaltyProducer ( ) ;
2010-10-07 02:06:49 +04:00
SetWeight ( m_unknownWordPenaltyProducer , weightUnknownWord ) ;
2011-02-24 16:14:42 +03:00
// reordering constraints
m_maxDistortion = ( m_parameter - > GetParam ( " distortion-limit " ) . size ( ) > 0 ) ?
Scan < int > ( m_parameter - > GetParam ( " distortion-limit " ) [ 0 ] )
: - 1 ;
SetBooleanParameter ( & m_reorderingConstraint , " monotone-at-punctuation " , false ) ;
// settings for pruning
m_maxHypoStackSize = ( m_parameter - > GetParam ( " stack " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " stack " ) [ 0 ] ) : DEFAULT_MAX_HYPOSTACK_SIZE ;
2012-04-29 08:37:48 +04:00
std : : cerr < < " max stack size: " < < m_maxHypoStackSize < < std : : endl ;
2011-02-24 16:14:42 +03:00
m_minHypoStackDiversity = 0 ;
if ( m_parameter - > GetParam ( " stack-diversity " ) . size ( ) > 0 ) {
if ( m_maxDistortion > 15 ) {
UserMessage : : Add ( " stack diversity > 0 is not allowed for distortion limits larger than 15 " ) ;
return false ;
}
if ( m_inputType = = WordLatticeInput ) {
UserMessage : : Add ( " stack diversity > 0 is not allowed for lattice input " ) ;
return false ;
}
m_minHypoStackDiversity = Scan < size_t > ( m_parameter - > GetParam ( " stack-diversity " ) [ 0 ] ) ;
}
m_beamWidth = ( m_parameter - > GetParam ( " beam-threshold " ) . size ( ) > 0 ) ?
TransformScore ( Scan < float > ( m_parameter - > GetParam ( " beam-threshold " ) [ 0 ] ) )
: TransformScore ( DEFAULT_BEAM_WIDTH ) ;
m_earlyDiscardingThreshold = ( m_parameter - > GetParam ( " early-discarding-threshold " ) . size ( ) > 0 ) ?
TransformScore ( Scan < float > ( m_parameter - > GetParam ( " early-discarding-threshold " ) [ 0 ] ) )
: TransformScore ( DEFAULT_EARLY_DISCARDING_THRESHOLD ) ;
m_translationOptionThreshold = ( m_parameter - > GetParam ( " translation-option-threshold " ) . size ( ) > 0 ) ?
TransformScore ( Scan < float > ( m_parameter - > GetParam ( " translation-option-threshold " ) [ 0 ] ) )
: TransformScore ( DEFAULT_TRANSLATION_OPTION_THRESHOLD ) ;
2012-04-29 08:37:48 +04:00
std : : cerr < < " beamwidth: " < < m_beamWidth < < std : : endl ;
std : : cerr < < " early discarding threshold: " < < m_earlyDiscardingThreshold < < std : : endl ;
std : : cerr < < " translOptThreshold: " < < m_translationOptionThreshold < < std : : endl ;
2011-02-24 16:14:42 +03:00
m_maxNoTransOptPerCoverage = ( m_parameter - > GetParam ( " max-trans-opt-per-coverage " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " max-trans-opt-per-coverage " ) [ 0 ] ) : DEFAULT_MAX_TRANS_OPT_SIZE ;
m_maxNoPartTransOpt = ( m_parameter - > GetParam ( " max-partial-trans-opt " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " max-partial-trans-opt " ) [ 0 ] ) : DEFAULT_MAX_PART_TRANS_OPT_SIZE ;
m_maxPhraseLength = ( m_parameter - > GetParam ( " max-phrase-length " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " max-phrase-length " ) [ 0 ] ) : DEFAULT_MAX_PHRASE_LENGTH ;
m_cubePruningPopLimit = ( m_parameter - > GetParam ( " cube-pruning-pop-limit " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " cube-pruning-pop-limit " ) [ 0 ] ) : DEFAULT_CUBE_PRUNING_POP_LIMIT ;
m_cubePruningDiversity = ( m_parameter - > GetParam ( " cube-pruning-diversity " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " cube-pruning-diversity " ) [ 0 ] ) : DEFAULT_CUBE_PRUNING_DIVERSITY ;
2011-06-27 19:13:15 +04:00
SetBooleanParameter ( & m_cubePruningLazyScoring , " cube-pruning-lazy-scoring " , false ) ;
2011-02-24 16:14:42 +03:00
// unknown word processing
SetBooleanParameter ( & m_dropUnknown , " drop-unknown " , false ) ;
2011-09-09 22:03:00 +04:00
SetBooleanParameter ( & m_lmEnableOOVFeature , " lmodel-oov-feature " , false ) ;
2011-02-24 16:14:42 +03:00
// minimum Bayes risk decoding
SetBooleanParameter ( & m_mbr , " minimum-bayes-risk " , false ) ;
2010-02-03 13:23:32 +03:00
m_mbrSize = ( m_parameter - > GetParam ( " mbr-size " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < size_t > ( m_parameter - > GetParam ( " mbr-size " ) [ 0 ] ) : 200 ;
m_mbrScale = ( m_parameter - > GetParam ( " mbr-scale " ) . size ( ) > 0 ) ?
Scan < float > ( m_parameter - > GetParam ( " mbr-scale " ) [ 0 ] ) : 1.0f ;
2008-06-11 14:52:57 +04:00
2010-02-03 13:23:32 +03:00
//lattice mbr
SetBooleanParameter ( & m_useLatticeMBR , " lminimum-bayes-risk " , false ) ;
2010-04-12 13:51:29 +04:00
if ( m_useLatticeMBR & & m_mbr ) {
2011-02-24 16:14:42 +03:00
cerr < < " Errror: Cannot use both n-best mbr and lattice mbr together " < < endl ;
exit ( 1 ) ;
2010-04-12 13:51:29 +04:00
}
2012-05-28 10:03:45 +04:00
//mira training
SetBooleanParameter ( & m_mira , " mira " , false ) ;
2011-02-24 16:14:42 +03:00
2010-04-12 13:51:29 +04:00
if ( m_useLatticeMBR ) m_mbr = true ;
2011-02-24 16:14:42 +03:00
2010-02-09 14:37:33 +03:00
m_lmbrPruning = ( m_parameter - > GetParam ( " lmbr-pruning-factor " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < size_t > ( m_parameter - > GetParam ( " lmbr-pruning-factor " ) [ 0 ] ) : 30 ;
2010-02-03 13:23:32 +03:00
m_lmbrThetas = Scan < float > ( m_parameter - > GetParam ( " lmbr-thetas " ) ) ;
2010-02-03 14:20:20 +03:00
SetBooleanParameter ( & m_useLatticeHypSetForLatticeMBR , " lattice-hypo-set " , false ) ;
2010-02-03 22:46:35 +03:00
m_lmbrPrecision = ( m_parameter - > GetParam ( " lmbr-p " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < float > ( m_parameter - > GetParam ( " lmbr-p " ) [ 0 ] ) : 0.8f ;
2010-02-03 22:46:35 +03:00
m_lmbrPRatio = ( m_parameter - > GetParam ( " lmbr-r " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < float > ( m_parameter - > GetParam ( " lmbr-r " ) [ 0 ] ) : 0.6f ;
2010-03-14 23:23:17 +03:00
m_lmbrMapWeight = ( m_parameter - > GetParam ( " lmbr-map-weight " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < float > ( m_parameter - > GetParam ( " lmbr-map-weight " ) [ 0 ] ) : 0.0f ;
//consensus decoding
2010-04-12 13:51:29 +04:00
SetBooleanParameter ( & m_useConsensusDecoding , " consensus-decoding " , false ) ;
if ( m_useConsensusDecoding & & m_mbr ) {
2011-02-24 16:14:42 +03:00
cerr < < " Error: Cannot use consensus decoding together with mbr " < < endl ;
exit ( 1 ) ;
2010-04-12 13:51:29 +04:00
}
2011-02-24 16:14:42 +03:00
if ( m_useConsensusDecoding ) m_mbr = true ;
m_timeout_threshold = ( m_parameter - > GetParam ( " time-out " ) . size ( ) > 0 ) ?
Scan < size_t > ( m_parameter - > GetParam ( " time-out " ) [ 0 ] ) : - 1 ;
m_timeout = ( GetTimeoutThreshold ( ) = = ( size_t ) - 1 ) ? false : true ;
2008-06-11 14:52:57 +04:00
2010-04-23 19:01:06 +04:00
m_lmcache_cleanup_threshold = ( m_parameter - > GetParam ( " clean-lm-cache " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < size_t > ( m_parameter - > GetParam ( " clean-lm-cache " ) [ 0 ] ) : 1 ;
2010-04-23 19:01:06 +04:00
2011-09-23 02:29:56 +04:00
m_threadCount = 1 ;
const std : : vector < std : : string > & threadInfo = m_parameter - > GetParam ( " threads " ) ;
if ( ! threadInfo . empty ( ) ) {
if ( threadInfo [ 0 ] = = " all " ) {
# ifdef WITH_THREADS
m_threadCount = boost : : thread : : hardware_concurrency ( ) ;
if ( ! m_threadCount ) {
UserMessage : : Add ( " -threads all specified but Boost doesn't know how many cores there are " ) ;
return false ;
}
# else
UserMessage : : Add ( " -threads all specified but moses not built with thread support " ) ;
return false ;
# endif
} else {
m_threadCount = Scan < int > ( threadInfo [ 0 ] ) ;
if ( m_threadCount < 1 ) {
UserMessage : : Add ( " Specify at least one thread. " ) ;
return false ;
}
# ifndef WITH_THREADS
if ( m_threadCount > 1 ) {
UserMessage : : Add ( std : : string ( " Error: Thread count of " ) + threadInfo [ 0 ] + " but moses not built with thread support " ) ;
return false ;
}
# endif
}
}
2011-11-13 21:14:40 +04:00
m_startTranslationId = ( m_parameter - > GetParam ( " start-translation-id " ) . size ( ) > 0 ) ?
Scan < long > ( m_parameter - > GetParam ( " start-translation-id " ) [ 0 ] ) : 0 ;
2011-02-24 16:14:42 +03:00
// Read in constraint decoding file, if provided
if ( m_parameter - > GetParam ( " constraint " ) . size ( ) ) {
if ( m_parameter - > GetParam ( " search-algorithm " ) . size ( ) > 0
& & Scan < size_t > ( m_parameter - > GetParam ( " search-algorithm " ) [ 0 ] ) ! = 0 ) {
2010-01-29 20:11:34 +03:00
cerr < < " Can use -constraint only with stack-based search (-search-algorithm 0) " < < endl ;
exit ( 1 ) ;
}
2011-02-24 16:14:42 +03:00
m_constraintFileName = m_parameter - > GetParam ( " constraint " ) [ 0 ] ;
InputFileStream constraintFile ( m_constraintFileName ) ;
std : : string line ;
2011-11-13 21:14:40 +04:00
long sentenceID = GetStartTranslationId ( ) - 1 ;
2011-02-24 16:14:42 +03:00
while ( getline ( constraintFile , line ) ) {
vector < string > vecStr = Tokenize ( line , " \t " ) ;
if ( vecStr . size ( ) = = 1 ) {
sentenceID + + ;
2011-11-21 14:49:26 +04:00
Phrase phrase ( 0 ) ;
2011-02-24 16:14:42 +03:00
phrase . CreateFromString ( GetOutputFactorOrder ( ) , vecStr [ 0 ] , GetFactorDelimiter ( ) ) ;
m_constraints . insert ( make_pair ( sentenceID , phrase ) ) ;
} else if ( vecStr . size ( ) = = 2 ) {
sentenceID = Scan < long > ( vecStr [ 0 ] ) ;
2011-11-21 14:49:26 +04:00
Phrase phrase ( 0 ) ;
2011-02-24 16:14:42 +03:00
phrase . CreateFromString ( GetOutputFactorOrder ( ) , vecStr [ 1 ] , GetFactorDelimiter ( ) ) ;
m_constraints . insert ( make_pair ( sentenceID , phrase ) ) ;
} else {
2011-11-18 16:07:41 +04:00
CHECK ( false ) ;
2011-02-24 16:14:42 +03:00
}
}
}
// use of xml in input
if ( m_parameter - > GetParam ( " xml-input " ) . size ( ) = = 0 ) m_xmlInputType = XmlPassThrough ;
else if ( m_parameter - > GetParam ( " xml-input " ) [ 0 ] = = " exclusive " ) m_xmlInputType = XmlExclusive ;
else if ( m_parameter - > GetParam ( " xml-input " ) [ 0 ] = = " inclusive " ) m_xmlInputType = XmlInclusive ;
else if ( m_parameter - > GetParam ( " xml-input " ) [ 0 ] = = " ignore " ) m_xmlInputType = XmlIgnore ;
else if ( m_parameter - > GetParam ( " xml-input " ) [ 0 ] = = " pass-through " ) m_xmlInputType = XmlPassThrough ;
else {
UserMessage : : Add ( " invalid xml-input value, must be pass-through, exclusive, inclusive, or ignore " ) ;
return false ;
}
2011-11-16 16:38:22 +04:00
// specify XML tags opening and closing brackets for XML option
if ( m_parameter - > GetParam ( " xml-brackets " ) . size ( ) > 0 ) {
std : : vector < std : : string > brackets = Tokenize ( m_parameter - > GetParam ( " xml-brackets " ) [ 0 ] ) ;
if ( brackets . size ( ) ! = 2 ) {
cerr < < " invalid xml-brackets value, must specify exactly 2 blank-delimited strings for XML tags opening and closing brackets " < < endl ;
exit ( 1 ) ;
}
m_xmlBrackets . first = brackets [ 0 ] ;
m_xmlBrackets . second = brackets [ 1 ] ;
cerr < < " XML tags opening and closing brackets for XML input are: " < < m_xmlBrackets . first < < " and " < < m_xmlBrackets . second < < endl ;
}
2011-05-13 23:28:23 +04:00
# ifdef HAVE_SYNLM
if ( m_parameter - > GetParam ( " slmodel-file " ) . size ( ) > 0 ) {
if ( ! LoadSyntacticLanguageModel ( ) ) return false ;
}
# endif
2011-02-24 16:14:42 +03:00
if ( ! LoadLexicalReorderingModel ( ) ) return false ;
if ( ! LoadLanguageModels ( ) ) return false ;
if ( ! LoadGenerationTables ( ) ) return false ;
if ( ! LoadPhraseTables ( ) ) return false ;
if ( ! LoadGlobalLexicalModel ( ) ) return false ;
2012-01-31 14:31:39 +04:00
if ( ! LoadGlobalLexicalModelUnlimited ( ) ) return false ;
2010-09-14 20:25:33 +04:00
if ( ! LoadDecodeGraphs ( ) ) return false ;
if ( ! LoadReferences ( ) ) return false ;
2010-10-15 19:19:17 +04:00
if ( ! LoadDiscrimLMFeature ( ) ) return false ;
2011-03-22 17:33:16 +03:00
if ( ! LoadPhrasePairFeature ( ) ) return false ;
2011-05-11 02:02:25 +04:00
if ( ! LoadPhraseBoundaryFeature ( ) ) return false ;
2011-08-06 18:10:43 +04:00
if ( ! LoadPhraseLengthFeature ( ) ) return false ;
2011-08-13 04:25:23 +04:00
if ( ! LoadTargetWordInsertionFeature ( ) ) return false ;
2011-08-13 05:39:35 +04:00
if ( ! LoadSourceWordDeletionFeature ( ) ) return false ;
2011-08-13 06:40:54 +04:00
if ( ! LoadWordTranslationFeature ( ) ) return false ;
2010-08-10 17:12:00 +04:00
2011-08-07 04:58:56 +04:00
// report individual sparse features in n-best list
if ( m_parameter - > GetParam ( " report-sparse-features " ) . size ( ) > 0 ) {
for ( size_t i = 0 ; i < m_parameter - > GetParam ( " report-sparse-features " ) . size ( ) ; i + + ) {
const std : : string & name = m_parameter - > GetParam ( " report-sparse-features " ) [ i ] ;
2011-09-20 14:23:38 +04:00
if ( m_targetBigramFeature & & name . compare ( m_targetBigramFeature - > GetScoreProducerWeightShortName ( 0 ) ) = = 0 )
2011-08-07 05:57:41 +04:00
m_targetBigramFeature - > SetSparseFeatureReporting ( ) ;
2011-11-22 16:15:15 +04:00
if ( m_targetNgramFeatures . size ( ) > 0 )
for ( size_t i = 0 ; i < m_targetNgramFeatures . size ( ) ; + + i )
if ( name . compare ( m_targetNgramFeatures [ i ] - > GetScoreProducerWeightShortName ( 0 ) ) = = 0 )
m_targetNgramFeatures [ i ] - > SetSparseFeatureReporting ( ) ;
2011-09-20 14:23:38 +04:00
if ( m_phrasePairFeature & & name . compare ( m_phrasePairFeature - > GetScoreProducerWeightShortName ( 0 ) ) = = 0 )
2011-08-07 05:57:41 +04:00
m_phrasePairFeature - > SetSparseFeatureReporting ( ) ;
2011-09-20 14:23:38 +04:00
if ( m_phraseBoundaryFeature & & name . compare ( m_phraseBoundaryFeature - > GetScoreProducerWeightShortName ( 0 ) ) = = 0 )
2011-08-07 05:57:41 +04:00
m_phraseBoundaryFeature - > SetSparseFeatureReporting ( ) ;
2011-09-20 14:23:38 +04:00
if ( m_phraseLengthFeature & & name . compare ( m_phraseLengthFeature - > GetScoreProducerWeightShortName ( 0 ) ) = = 0 )
2011-08-07 04:58:56 +04:00
m_phraseLengthFeature - > SetSparseFeatureReporting ( ) ;
2011-09-20 14:23:38 +04:00
if ( m_targetWordInsertionFeature & & name . compare ( m_targetWordInsertionFeature - > GetScoreProducerWeightShortName ( 0 ) ) = = 0 )
2011-08-13 04:25:23 +04:00
m_targetWordInsertionFeature - > SetSparseFeatureReporting ( ) ;
2011-09-20 14:23:38 +04:00
if ( m_sourceWordDeletionFeature & & name . compare ( m_sourceWordDeletionFeature - > GetScoreProducerWeightShortName ( 0 ) ) = = 0 )
2011-08-13 05:39:35 +04:00
m_sourceWordDeletionFeature - > SetSparseFeatureReporting ( ) ;
2011-09-20 14:23:38 +04:00
if ( m_wordTranslationFeature & & name . compare ( m_wordTranslationFeature - > GetScoreProducerWeightShortName ( 0 ) ) = = 0 )
2011-08-13 06:40:54 +04:00
m_wordTranslationFeature - > SetSparseFeatureReporting ( ) ;
2011-09-21 01:25:56 +04:00
for ( size_t j = 0 ; j < m_sparsePhraseDictionary . size ( ) ; + + j ) {
if ( m_sparsePhraseDictionary [ j ] & & name . compare ( m_sparsePhraseDictionary [ j ] - > GetScoreProducerWeightShortName ( 0 ) ) = = 0 ) {
m_sparsePhraseDictionary [ j ] - > SetSparseFeatureReporting ( ) ;
}
}
2011-08-07 04:58:56 +04:00
}
}
2010-08-10 17:12:00 +04:00
//configure the translation systems with these tables
vector < string > tsConfig = m_parameter - > GetParam ( " translation-systems " ) ;
if ( ! tsConfig . size ( ) ) {
//use all models in default system.
2010-09-07 14:54:04 +04:00
tsConfig . push_back ( TranslationSystem : : DEFAULT + " R * D * L * G * " ) ;
2010-08-10 17:12:00 +04:00
}
2011-02-24 16:14:42 +03:00
2010-08-10 17:12:00 +04:00
if ( m_wordPenaltyProducers . size ( ) ! = tsConfig . size ( ) ) {
UserMessage : : Add ( string ( " Mismatch between number of word penalties and number of translation systems " ) ) ;
return false ;
}
2011-02-24 16:14:42 +03:00
if ( m_searchAlgorithm = = ChartDecoding ) {
//insert some null distortion score producers
m_distortionScoreProducers . assign ( tsConfig . size ( ) , NULL ) ;
} else {
if ( m_distortionScoreProducers . size ( ) ! = tsConfig . size ( ) ) {
2011-07-21 06:41:23 +04:00
UserMessage : : Add ( string ( " Mismatch between number of distortion scores and number of translation systems. Or [search-algorithm] has been set to a phrase-based algorithm when it should be chart decoding " ) ) ;
2011-02-24 16:14:42 +03:00
return false ;
2010-08-10 17:12:00 +04:00
}
2011-02-24 16:14:42 +03:00
}
2010-08-10 17:12:00 +04:00
for ( size_t i = 0 ; i < tsConfig . size ( ) ; + + i ) {
vector < string > config = Tokenize ( tsConfig [ i ] ) ;
if ( config . size ( ) % 2 ! = 1 ) {
UserMessage : : Add ( string ( " Incorrect number of fields in Translation System config. Should be an odd number " ) ) ;
}
m_translationSystems . insert ( pair < string , TranslationSystem > ( config [ 0 ] ,
2011-02-24 16:14:42 +03:00
TranslationSystem ( config [ 0 ] , m_wordPenaltyProducers [ i ] , m_unknownWordPenaltyProducer , m_distortionScoreProducers [ i ] ) ) ) ;
2010-08-10 17:12:00 +04:00
for ( size_t j = 1 ; j < config . size ( ) ; j + = 2 ) {
const string & id = config [ j ] ;
const string & tables = config [ j + 1 ] ;
set < size_t > tableIds ;
if ( tables ! = " * " ) {
//selected tables
vector < string > tableIdStrings = Tokenize ( tables , " , " ) ;
vector < size_t > tableIdList ;
Scan < size_t > ( tableIdList , tableIdStrings ) ;
copy ( tableIdList . begin ( ) , tableIdList . end ( ) , inserter ( tableIds , tableIds . end ( ) ) ) ;
}
if ( id = = " D " ) {
for ( size_t k = 0 ; k < m_decodeGraphs . size ( ) ; + + k ) {
if ( ! tableIds . size ( ) | | tableIds . find ( k ) ! = tableIds . end ( ) ) {
VERBOSE ( 2 , " Adding decoder graph " < < k < < " to translation system " < < config [ 0 ] < < endl ) ;
2011-08-18 01:13:21 +04:00
m_translationSystems . find ( config [ 0 ] ) - > second . AddDecodeGraph ( m_decodeGraphs [ k ] , m_decodeGraphBackoff [ k ] ) ;
2010-08-10 17:12:00 +04:00
}
}
} else if ( id = = " R " ) {
for ( size_t k = 0 ; k < m_reorderModels . size ( ) ; + + k ) {
if ( ! tableIds . size ( ) | | tableIds . find ( k ) ! = tableIds . end ( ) ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddReorderModel ( m_reorderModels [ k ] ) ;
VERBOSE ( 2 , " Adding reorder table " < < k < < " to translation system " < < config [ 0 ] < < endl ) ;
}
}
} else if ( id = = " G " ) {
for ( size_t k = 0 ; k < m_globalLexicalModels . size ( ) ; + + k ) {
if ( ! tableIds . size ( ) | | tableIds . find ( k ) ! = tableIds . end ( ) ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddGlobalLexicalModel ( m_globalLexicalModels [ k ] ) ;
VERBOSE ( 2 , " Adding global lexical model " < < k < < " to translation system " < < config [ 0 ] < < endl ) ;
}
}
} else if ( id = = " L " ) {
size_t lmid = 0 ;
for ( LMList : : const_iterator k = m_languageModel . begin ( ) ; k ! = m_languageModel . end ( ) ; + + k , + + lmid ) {
if ( ! tableIds . size ( ) | | tableIds . find ( lmid ) ! = tableIds . end ( ) ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddLanguageModel ( * k ) ;
VERBOSE ( 2 , " Adding language model " < < lmid < < " to translation system " < < config [ 0 ] < < endl ) ;
}
}
} else {
UserMessage : : Add ( string ( " Incorrect translation system identifier: " ) + id ) ;
return false ;
}
}
//Instigate dictionary loading
m_translationSystems . find ( config [ 0 ] ) - > second . ConfigDictionaries ( ) ;
//Add any other features here.
2010-09-14 20:25:33 +04:00
if ( m_bleuScoreFeature ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_bleuScoreFeature ) ;
}
2010-10-15 19:19:17 +04:00
if ( m_targetBigramFeature ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_targetBigramFeature ) ;
}
2011-11-22 16:15:15 +04:00
if ( m_targetNgramFeatures . size ( ) > 0 ) {
2012-01-31 14:31:39 +04:00
for ( size_t i = 0 ; i < m_targetNgramFeatures . size ( ) ; + + i )
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_targetNgramFeatures [ i ] ) ;
2011-11-04 20:40:12 +04:00
}
2011-03-22 17:33:16 +03:00
if ( m_phrasePairFeature ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_phrasePairFeature ) ;
}
2011-05-11 02:02:25 +04:00
if ( m_phraseBoundaryFeature ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_phraseBoundaryFeature ) ;
}
2011-08-06 18:10:43 +04:00
if ( m_phraseLengthFeature ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_phraseLengthFeature ) ;
}
2011-08-13 04:25:23 +04:00
if ( m_targetWordInsertionFeature ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_targetWordInsertionFeature ) ;
}
2011-08-13 05:39:35 +04:00
if ( m_sourceWordDeletionFeature ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_sourceWordDeletionFeature ) ;
}
2011-08-13 06:40:54 +04:00
if ( m_wordTranslationFeature ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_wordTranslationFeature ) ;
}
2011-05-24 19:47:35 +04:00
# ifdef HAVE_SYNLM
if ( m_syntacticLanguageModel ! = NULL ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_syntacticLanguageModel ) ;
}
# endif
2011-09-21 01:25:56 +04:00
for ( size_t i = 0 ; i < m_sparsePhraseDictionary . size ( ) ; + + i ) {
if ( m_sparsePhraseDictionary [ i ] ) {
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_sparsePhraseDictionary [ i ] ) ;
}
}
2012-01-31 14:31:39 +04:00
if ( m_globalLexicalModelsUnlimited . size ( ) > 0 ) {
for ( size_t i = 0 ; i < m_globalLexicalModelsUnlimited . size ( ) ; + + i )
m_translationSystems . find ( config [ 0 ] ) - > second . AddFeatureFunction ( m_globalLexicalModelsUnlimited [ i ] ) ;
}
2010-08-10 17:12:00 +04:00
}
2008-06-11 14:52:57 +04:00
2010-10-15 19:19:17 +04:00
//Load extra feature weights
//NB: These are common to all translation systems (at the moment!)
vector < string > extraWeightConfig = m_parameter - > GetParam ( " weight-file " ) ;
2011-08-19 20:09:36 +04:00
if ( extraWeightConfig . size ( ) ) {
2012-01-31 14:31:39 +04:00
if ( extraWeightConfig . size ( ) ! = 1 ) {
UserMessage : : Add ( " One argument should be supplied for weight-file " ) ;
return false ;
}
ScoreComponentCollection extraWeights ;
if ( ! extraWeights . Load ( extraWeightConfig [ 0 ] ) ) {
UserMessage : : Add ( " Unable to load weights from " + extraWeightConfig [ 0 ] ) ;
return false ;
}
2012-02-01 18:05:49 +04:00
// DLM: apply additional weight to sparse features if applicable
2011-11-24 23:27:12 +04:00
for ( size_t i = 0 ; i < m_targetNgramFeatures . size ( ) ; + + i ) {
2012-03-15 04:32:27 +04:00
float weight = m_targetNgramFeatures [ i ] - > GetSparseProducerWeight ( ) ;
if ( weight ! = 1 ) {
extraWeights . MultiplyEquals ( m_targetNgramFeatures [ i ] , weight ) ;
cerr < < " Set dlm sparse producer weight: " < < weight < < endl ;
}
2011-11-24 23:27:12 +04:00
}
2012-02-01 18:05:49 +04:00
// GLM: apply additional weight to sparse features if applicable
for ( size_t i = 0 ; i < m_globalLexicalModelsUnlimited . size ( ) ; + + i ) {
2012-03-15 04:32:27 +04:00
float weight = m_globalLexicalModelsUnlimited [ i ] - > GetSparseProducerWeight ( ) ;
if ( weight ! = 1 ) {
extraWeights . MultiplyEquals ( m_globalLexicalModelsUnlimited [ i ] , weight ) ;
cerr < < " Set glm sparse producer weight: " < < weight < < endl ;
}
2012-01-31 14:31:39 +04:00
}
2012-03-15 04:32:27 +04:00
// WT: apply additional weight to sparse features if applicable
if ( m_wordTranslationFeature ) {
float weight = m_wordTranslationFeature - > GetSparseProducerWeight ( ) ;
if ( weight ! = 1 ) {
extraWeights . MultiplyEquals ( m_wordTranslationFeature , weight ) ;
cerr < < " Set wt sparse producer weight: " < < weight < < endl ;
}
}
// PP: apply additional weight to sparse features if applicable
if ( m_phrasePairFeature ) {
float weight = m_phrasePairFeature - > GetSparseProducerWeight ( ) ;
if ( weight ! = 1 ) {
extraWeights . MultiplyEquals ( m_phrasePairFeature , weight ) ;
cerr < < " Set pp sparse producer weight: " < < weight < < endl ;
}
}
// PB: apply additional weight to sparse features if applicable
if ( m_phraseBoundaryFeature ) {
float weight = m_phraseBoundaryFeature - > GetSparseProducerWeight ( ) ;
if ( weight ! = 1 ) {
extraWeights . MultiplyEquals ( m_phraseBoundaryFeature , weight ) ;
cerr < < " Set pb sparse producer weight: " < < weight < < endl ;
}
}
m_allWeights . PlusEquals ( extraWeights ) ;
2012-02-01 18:05:49 +04:00
}
2011-02-24 16:14:42 +03:00
return true ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
void StaticData : : SetBooleanParameter ( bool * parameter , string parameterName , bool defaultValue )
2008-06-11 14:52:57 +04:00
{
// default value if nothing is specified
* parameter = defaultValue ;
2011-02-24 16:14:42 +03:00
if ( ! m_parameter - > isParamSpecified ( parameterName ) ) {
2008-06-11 14:52:57 +04:00
return ;
}
// if parameter is just specified as, e.g. "-parameter" set it true
2011-02-24 16:14:42 +03:00
if ( m_parameter - > GetParam ( parameterName ) . size ( ) = = 0 ) {
2008-06-11 14:52:57 +04:00
* parameter = true ;
}
// if paramter is specified "-parameter true" or "-parameter false"
2011-02-24 16:14:42 +03:00
else if ( m_parameter - > GetParam ( parameterName ) . size ( ) = = 1 ) {
2008-06-11 14:52:57 +04:00
* parameter = Scan < bool > ( m_parameter - > GetParam ( parameterName ) [ 0 ] ) ;
}
}
2010-10-07 02:06:49 +04:00
void StaticData : : SetWeight ( const ScoreProducer * sp , float weight )
{
2011-11-09 21:16:02 +04:00
m_allWeights . Resize ( ) ;
2010-10-07 02:06:49 +04:00
m_allWeights . Assign ( sp , weight ) ;
}
void StaticData : : SetWeights ( const ScoreProducer * sp , const std : : vector < float > & weights )
{
2011-11-09 21:16:02 +04:00
m_allWeights . Resize ( ) ;
2010-10-07 02:06:49 +04:00
m_allWeights . Assign ( sp , weights ) ;
}
2008-06-11 14:52:57 +04:00
StaticData : : ~ StaticData ( )
{
2011-09-20 19:32:26 +04:00
RemoveAllInColl ( m_sparsePhraseDictionary ) ;
2011-02-24 16:14:42 +03:00
RemoveAllInColl ( m_phraseDictionary ) ;
RemoveAllInColl ( m_generationDictionary ) ;
RemoveAllInColl ( m_reorderModels ) ;
RemoveAllInColl ( m_globalLexicalModels ) ;
2011-05-13 23:28:23 +04:00
# ifdef HAVE_SYNLM
delete m_syntacticLanguageModel ;
# endif
2010-10-15 19:19:17 +04:00
RemoveAllInColl ( m_decodeGraphs ) ;
RemoveAllInColl ( m_wordPenaltyProducers ) ;
RemoveAllInColl ( m_distortionScoreProducers ) ;
m_languageModel . CleanUp ( ) ;
2011-02-24 16:14:42 +03:00
// delete trans opt
2011-05-31 13:42:27 +04:00
ClearTransOptionCache ( ) ;
2011-02-24 16:14:42 +03:00
// small score producers
delete m_unknownWordPenaltyProducer ;
2011-08-19 20:09:36 +04:00
delete m_targetBigramFeature ;
2011-11-22 16:15:15 +04:00
for ( size_t i = 0 ; i < m_targetNgramFeatures . size ( ) ; + + i )
delete m_targetNgramFeatures [ i ] ;
2011-03-22 17:33:16 +03:00
delete m_phrasePairFeature ;
2011-05-11 02:02:25 +04:00
delete m_phraseBoundaryFeature ;
2011-08-19 20:09:36 +04:00
delete m_phraseLengthFeature ;
2011-08-13 06:40:54 +04:00
delete m_targetWordInsertionFeature ;
2011-08-13 05:39:35 +04:00
delete m_sourceWordDeletionFeature ;
2011-08-13 06:40:54 +04:00
delete m_wordTranslationFeature ;
2012-01-31 14:31:39 +04:00
for ( size_t i = 0 ; i < m_globalLexicalModelsUnlimited . size ( ) ; + + i )
delete m_globalLexicalModelsUnlimited [ i ] ;
2008-06-11 14:52:57 +04:00
2011-02-24 16:14:42 +03:00
//delete m_parameter;
2010-04-08 21:16:10 +04:00
2011-02-24 16:14:42 +03:00
// memory pools
Phrase : : FinalizeMemPool ( ) ;
2008-06-11 14:52:57 +04:00
}
2011-05-13 23:28:23 +04:00
# ifdef HAVE_SYNLM
bool StaticData : : LoadSyntacticLanguageModel ( ) {
cerr < < " Loading syntactic language models... " < < std : : endl ;
const vector < float > weights = Scan < float > ( m_parameter - > GetParam ( " weight-slm " ) ) ;
const vector < string > files = m_parameter - > GetParam ( " slmodel-file " ) ;
const FactorType factorType = ( m_parameter - > GetParam ( " slmodel-factor " ) . size ( ) > 0 ) ?
TransformScore ( Scan < int > ( m_parameter - > GetParam ( " slmodel-factor " ) [ 0 ] ) )
: 0 ;
const size_t beamWidth = ( m_parameter - > GetParam ( " slmodel-beam " ) . size ( ) > 0 ) ?
TransformScore ( Scan < int > ( m_parameter - > GetParam ( " slmodel-beam " ) [ 0 ] ) )
: 500 ;
if ( files . size ( ) < 1 ) {
cerr < < " No syntactic language model files specified! " < < std : : endl ;
return false ;
}
// check if feature is used
if ( weights . size ( ) > = 1 ) {
//cout.setf(ios::scientific,ios::floatfield);
//cerr.setf(ios::scientific,ios::floatfield);
// create the feature
m_syntacticLanguageModel = new SyntacticLanguageModel ( files , weights , factorType , beamWidth ) ;
/*
/////////////////////////////////////////
// BEGIN LANE's UNSTABLE EXPERIMENT :)
//
double ppl = m_syntacticLanguageModel - > perplexity ( ) ;
cerr < < " Probability is " < < ppl < < endl ;
//
// END LANE's UNSTABLE EXPERIMENT
/////////////////////////////////////////
*/
if ( m_syntacticLanguageModel = = NULL ) {
return false ;
}
}
return true ;
}
# endif
2008-06-11 14:52:57 +04:00
bool StaticData : : LoadLexicalReorderingModel ( )
{
2011-02-24 16:14:42 +03:00
VERBOSE ( 1 , " Loading lexical distortion models... " ) ;
const vector < string > fileStr = m_parameter - > GetParam ( " distortion-file " ) ;
bool hasWeightlr = ( m_parameter - > GetParam ( " weight-lr " ) . size ( ) ! = 0 ) ;
vector < string > weightsStr ;
if ( hasWeightlr ) {
weightsStr = m_parameter - > GetParam ( " weight-lr " ) ;
} else {
weightsStr = m_parameter - > GetParam ( " weight-d " ) ;
}
std : : vector < float > weights ;
size_t w = 1 ; //cur weight
if ( hasWeightlr ) {
w = 0 ; // if reading from weight-lr, don't have to count first as distortion penalty
}
size_t f = 0 ; //cur file
//get weights values
VERBOSE ( 1 , " have " < < fileStr . size ( ) < < " models " < < std : : endl ) ;
for ( size_t j = 0 ; j < weightsStr . size ( ) ; + + j ) {
weights . push_back ( Scan < float > ( weightsStr [ j ] ) ) ;
}
//load all models
for ( size_t i = 0 ; i < fileStr . size ( ) ; + + i ) {
vector < string > spec = Tokenize < string > ( fileStr [ f ] , " " ) ;
+ + f ; //mark file as consumed
if ( spec . size ( ) ! = 4 ) {
UserMessage : : Add ( " Invalid Lexical Reordering Model Specification: " + fileStr [ f ] ) ;
return false ;
2010-08-10 17:12:00 +04:00
}
2011-02-24 16:14:42 +03:00
// spec[0] = factor map
// spec[1] = name
// spec[2] = num weights
// spec[3] = fileName
// decode factor map
vector < FactorType > input , output ;
vector < string > inputfactors = Tokenize ( spec [ 0 ] , " - " ) ;
if ( inputfactors . size ( ) = = 2 ) {
input = Tokenize < FactorType > ( inputfactors [ 0 ] , " , " ) ;
output = Tokenize < FactorType > ( inputfactors [ 1 ] , " , " ) ;
} else if ( inputfactors . size ( ) = = 1 ) {
//if there is only one side assume it is on e side... why?
output = Tokenize < FactorType > ( inputfactors [ 0 ] , " , " ) ;
} else {
//format error
return false ;
2010-08-10 17:12:00 +04:00
}
2011-02-24 16:14:42 +03:00
string modelType = spec [ 1 ] ;
// decode num weights and fetch weights from array
std : : vector < float > mweights ;
size_t numWeights = atoi ( spec [ 2 ] . c_str ( ) ) ;
for ( size_t k = 0 ; k < numWeights ; + + k , + + w ) {
if ( w > = weights . size ( ) ) {
UserMessage : : Add ( " Lexicalized distortion model: Not enough weights, add to [weight-d] " ) ;
return false ;
} else
mweights . push_back ( weights [ w ] ) ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
string filePath = spec [ 3 ] ;
2011-11-09 01:22:34 +04:00
m_reorderModels . push_back ( new LexicalReordering ( input , output , LexicalReorderingConfiguration ( modelType ) , filePath , mweights ) ) ;
2011-02-24 16:14:42 +03:00
}
return true ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
2009-05-26 23:30:35 +04:00
bool StaticData : : LoadGlobalLexicalModel ( )
{
2011-02-24 16:14:42 +03:00
const vector < float > & weight = Scan < float > ( m_parameter - > GetParam ( " weight-lex " ) ) ;
const vector < string > & file = m_parameter - > GetParam ( " global-lexical-file " ) ;
if ( weight . size ( ) ! = file . size ( ) ) {
std : : cerr < < " number of weights and models for the global lexical model does not match ( "
< < weight . size ( ) < < " != " < < file . size ( ) < < " ) " < < std : : endl ;
return false ;
}
for ( size_t i = 0 ; i < weight . size ( ) ; i + + ) {
vector < string > spec = Tokenize < string > ( file [ i ] , " " ) ;
if ( spec . size ( ) ! = 2 ) {
std : : cerr < < " wrong global lexical model specification: " < < file [ i ] < < endl ;
return false ;
}
vector < string > factors = Tokenize ( spec [ 0 ] , " - " ) ;
if ( factors . size ( ) ! = 2 ) {
std : : cerr < < " wrong factor definition for global lexical model: " < < spec [ 0 ] < < endl ;
return false ;
}
vector < FactorType > inputFactors = Tokenize < FactorType > ( factors [ 0 ] , " , " ) ;
vector < FactorType > outputFactors = Tokenize < FactorType > ( factors [ 1 ] , " , " ) ;
2011-08-19 20:09:36 +04:00
m_globalLexicalModels . push_back ( new GlobalLexicalModel ( spec [ 1 ] , inputFactors , outputFactors ) ) ;
2010-10-07 02:06:49 +04:00
SetWeight ( m_globalLexicalModels . back ( ) , weight [ i ] ) ;
2011-02-24 16:14:42 +03:00
}
return true ;
2009-05-26 23:30:35 +04:00
}
2012-01-31 14:31:39 +04:00
bool StaticData : : LoadGlobalLexicalModelUnlimited ( )
{
const vector < float > & weight = Scan < float > ( m_parameter - > GetParam ( " weight-glm " ) ) ;
2012-05-18 21:59:10 +04:00
const vector < string > & modelSpec = m_parameter - > GetParam ( " glm-feature " ) ;
2012-01-31 14:31:39 +04:00
2012-03-15 04:32:27 +04:00
if ( weight . size ( ) ! = 0 & & weight . size ( ) ! = modelSpec . size ( ) ) {
2012-02-14 18:31:26 +04:00
std : : cerr < < " number of sparse producer weights and model specs for the global lexical model unlimited "
" does not match ( " < < weight . size ( ) < < " != " < < modelSpec . size ( ) < < " ) " < < std : : endl ;
2012-01-31 14:31:39 +04:00
return false ;
}
2012-03-15 04:32:27 +04:00
for ( size_t i = 0 ; i < modelSpec . size ( ) ; i + + ) {
2012-03-20 17:52:52 +04:00
bool ignorePunctuation = true , biasFeature = false , restricted = false ;
2012-02-29 00:22:09 +04:00
size_t context = 0 ;
2012-02-27 00:14:49 +04:00
string filenameSource , filenameTarget ;
2012-02-15 15:27:00 +04:00
vector < string > factors ;
2012-02-26 21:46:04 +04:00
vector < string > spec = Tokenize ( modelSpec [ i ] , " " ) ;
2012-02-15 15:27:00 +04:00
2012-02-17 19:55:59 +04:00
// read optional punctuation and bias specifications
2012-02-26 21:46:04 +04:00
if ( spec . size ( ) > 0 ) {
2012-02-27 05:22:25 +04:00
if ( spec . size ( ) ! = 2 & & spec . size ( ) ! = 3 & & spec . size ( ) ! = 4 & & spec . size ( ) ! = 6 ) {
2012-05-18 21:59:10 +04:00
UserMessage : : Add ( " Format of glm feature is <factor-src>-<factor-tgt> [ignore-punct] [use-bias] "
2012-02-29 00:22:09 +04:00
" [context-type] [filename-src filename-tgt] " ) ;
return false ;
2012-02-27 05:22:25 +04:00
}
factors = Tokenize ( spec [ 0 ] , " - " ) ;
if ( spec . size ( ) > = 2 )
2012-02-29 00:22:09 +04:00
ignorePunctuation = Scan < size_t > ( spec [ 1 ] ) ;
2012-02-27 05:22:25 +04:00
if ( spec . size ( ) > = 3 )
2012-02-29 00:22:09 +04:00
biasFeature = Scan < size_t > ( spec [ 2 ] ) ;
2012-02-27 05:22:25 +04:00
if ( spec . size ( ) > = 4 )
2012-02-29 00:22:09 +04:00
context = Scan < size_t > ( spec [ 3 ] ) ;
2012-02-27 05:22:25 +04:00
if ( spec . size ( ) = = 6 ) {
2012-02-29 00:22:09 +04:00
filenameSource = spec [ 4 ] ;
filenameTarget = spec [ 5 ] ;
restricted = true ;
2012-02-27 05:22:25 +04:00
}
2012-02-15 15:27:00 +04:00
}
else
factors = Tokenize ( modelSpec [ i ] , " - " ) ;
2012-01-31 14:31:39 +04:00
if ( factors . size ( ) ! = 2 ) {
2012-02-27 05:22:25 +04:00
UserMessage : : Add ( " Wrong factor definition for global lexical model unlimited: " + modelSpec [ i ] ) ;
2012-02-26 21:46:04 +04:00
return false ;
2012-01-31 14:31:39 +04:00
}
2012-02-27 05:22:25 +04:00
2012-01-31 14:31:39 +04:00
const vector < FactorType > inputFactors = Tokenize < FactorType > ( factors [ 0 ] , " , " ) ;
const vector < FactorType > outputFactors = Tokenize < FactorType > ( factors [ 1 ] , " , " ) ;
2012-02-29 00:22:09 +04:00
GlobalLexicalModelUnlimited * glmu = new GlobalLexicalModelUnlimited ( inputFactors , outputFactors , biasFeature , ignorePunctuation , context ) ;
2012-02-27 00:14:49 +04:00
m_globalLexicalModelsUnlimited . push_back ( glmu ) ;
if ( restricted ) {
cerr < < " loading word translation word lists from " < < filenameSource < < " and " < < filenameTarget < < endl ;
if ( ! glmu - > Load ( filenameSource , filenameTarget ) ) {
UserMessage : : Add ( " Unable to load word lists for word translation feature from files " + filenameSource + " and " + filenameTarget ) ;
return false ;
}
}
2012-05-18 21:59:10 +04:00
if ( weight . size ( ) > i )
m_globalLexicalModelsUnlimited [ i ] - > SetSparseProducerWeight ( weight [ i ] ) ;
2012-01-31 14:31:39 +04:00
}
return true ;
}
2008-06-11 14:52:57 +04:00
bool StaticData : : LoadLanguageModels ( )
{
2011-02-24 16:14:42 +03:00
if ( m_parameter - > GetParam ( " lmodel-file " ) . size ( ) > 0 ) {
// weights
vector < float > weightAll = Scan < float > ( m_parameter - > GetParam ( " weight-l " ) ) ;
// dictionary upper-bounds fo all IRST LMs
vector < int > LMdub = Scan < int > ( m_parameter - > GetParam ( " lmodel-dub " ) ) ;
if ( m_parameter - > GetParam ( " lmodel-dub " ) . size ( ) = = 0 ) {
for ( size_t i = 0 ; i < m_parameter - > GetParam ( " lmodel-file " ) . size ( ) ; i + + )
LMdub . push_back ( 0 ) ;
}
// initialize n-gram order for each factor. populated only by factored lm
const vector < string > & lmVector = m_parameter - > GetParam ( " lmodel-file " ) ;
//prevent language models from being loaded twice
map < string , LanguageModel * > languageModelsLoaded ;
for ( size_t i = 0 ; i < lmVector . size ( ) ; i + + ) {
LanguageModel * lm = NULL ;
if ( languageModelsLoaded . find ( lmVector [ i ] ) ! = languageModelsLoaded . end ( ) ) {
2011-10-28 18:54:23 +04:00
lm = languageModelsLoaded [ lmVector [ i ] ] - > Duplicate ( ) ;
2011-02-24 16:14:42 +03:00
} else {
vector < string > token = Tokenize ( lmVector [ i ] ) ;
if ( token . size ( ) ! = 4 & & token . size ( ) ! = 5 ) {
UserMessage : : Add ( " Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]' " ) ;
return false ;
2010-08-10 17:12:00 +04:00
}
2011-02-24 16:14:42 +03:00
// type = implementation, SRI, IRST etc
LMImplementation lmImplementation = static_cast < LMImplementation > ( Scan < int > ( token [ 0 ] ) ) ;
// factorType = 0 = Surface, 1 = POS, 2 = Stem, 3 = Morphology, etc
vector < FactorType > factorTypes = Tokenize < FactorType > ( token [ 1 ] , " , " ) ;
// nGramOrder = 2 = bigram, 3 = trigram, etc
size_t nGramOrder = Scan < int > ( token [ 2 ] ) ;
string & languageModelFile = token [ 3 ] ;
if ( token . size ( ) = = 5 ) {
if ( lmImplementation = = IRST )
languageModelFile + = " " + token [ 4 ] ;
else {
UserMessage : : Add ( " Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]' " ) ;
2010-08-10 17:12:00 +04:00
return false ;
}
2011-02-24 16:14:42 +03:00
}
IFVERBOSE ( 1 )
PrintUserTime ( string ( " Start loading LanguageModel " ) + languageModelFile ) ;
lm = LanguageModelFactory : : CreateLanguageModel (
lmImplementation
, factorTypes
, nGramOrder
, languageModelFile
, LMdub [ i ] ) ;
if ( lm = = NULL ) {
UserMessage : : Add ( " no LM created. We probably don't have it compiled " ) ;
return false ;
}
languageModelsLoaded [ lmVector [ i ] ] = lm ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
m_languageModel . Add ( lm ) ;
2011-10-13 20:50:16 +04:00
if ( m_lmEnableOOVFeature ) {
vector < float > weights ( 2 ) ;
weights [ 0 ] = weightAll . at ( i * 2 ) ;
weights [ 1 ] = weightAll . at ( i * 2 + 1 ) ;
SetWeights ( lm , weights ) ;
} else {
SetWeight ( lm , weightAll [ i ] ) ;
}
2011-02-24 16:14:42 +03:00
}
}
2008-06-11 14:52:57 +04:00
// flag indicating that language models were loaded,
// since phrase table loading requires their presence
m_fLMsLoaded = true ;
2011-02-24 16:14:42 +03:00
IFVERBOSE ( 1 )
PrintUserTime ( " Finished loading LanguageModels " ) ;
2008-06-11 14:52:57 +04:00
return true ;
}
bool StaticData : : LoadGenerationTables ( )
{
2011-02-24 16:14:42 +03:00
if ( m_parameter - > GetParam ( " generation-file " ) . size ( ) > 0 ) {
const vector < string > & generationVector = m_parameter - > GetParam ( " generation-file " ) ;
const vector < float > & weight = Scan < float > ( m_parameter - > GetParam ( " weight-generation " ) ) ;
IFVERBOSE ( 1 ) {
TRACE_ERR ( " weight-generation: " ) ;
for ( size_t i = 0 ; i < weight . size ( ) ; i + + ) {
TRACE_ERR ( weight [ i ] < < " \t " ) ;
}
TRACE_ERR ( endl ) ;
}
size_t currWeightNum = 0 ;
for ( size_t currDict = 0 ; currDict < generationVector . size ( ) ; currDict + + ) {
vector < string > token = Tokenize ( generationVector [ currDict ] ) ;
vector < FactorType > input = Tokenize < FactorType > ( token [ 0 ] , " , " )
, output = Tokenize < FactorType > ( token [ 1 ] , " , " ) ;
2008-06-11 14:52:57 +04:00
m_maxFactorIdx [ 1 ] = CalcMax ( m_maxFactorIdx [ 1 ] , input , output ) ;
2011-02-24 16:14:42 +03:00
string filePath ;
size_t numFeatures ;
2008-06-11 14:52:57 +04:00
2011-02-24 16:14:42 +03:00
numFeatures = Scan < size_t > ( token [ 2 ] ) ;
filePath = token [ 3 ] ;
2008-06-11 14:52:57 +04:00
2011-02-24 16:14:42 +03:00
if ( ! FileExists ( filePath ) & & FileExists ( filePath + " .gz " ) ) {
filePath + = " .gz " ;
}
2008-06-11 14:52:57 +04:00
2011-02-24 16:14:42 +03:00
VERBOSE ( 1 , filePath < < endl ) ;
2008-06-11 14:52:57 +04:00
2011-08-19 20:09:36 +04:00
m_generationDictionary . push_back ( new GenerationDictionary ( numFeatures , input , output ) ) ;
2011-11-18 16:07:41 +04:00
CHECK ( m_generationDictionary . back ( ) & & " could not create GenerationDictionary " ) ;
2011-02-24 16:14:42 +03:00
if ( ! m_generationDictionary . back ( ) - > Load ( filePath , Output ) ) {
delete m_generationDictionary . back ( ) ;
return false ;
}
2010-10-07 02:06:49 +04:00
vector < float > gdWeights ;
2011-02-24 16:14:42 +03:00
for ( size_t i = 0 ; i < numFeatures ; i + + ) {
2011-11-18 16:07:41 +04:00
CHECK ( currWeightNum < weight . size ( ) ) ;
2011-08-19 20:09:36 +04:00
gdWeights . push_back ( weight [ currWeightNum + + ] ) ;
2011-02-24 16:14:42 +03:00
}
2010-10-07 02:06:49 +04:00
SetWeights ( m_generationDictionary . back ( ) , gdWeights ) ;
2011-02-24 16:14:42 +03:00
}
if ( currWeightNum ! = weight . size ( ) ) {
TRACE_ERR ( " [WARNING] config file has " < < weight . size ( ) < < " generation weights listed, but the configuration for generation files indicates there should be " < < currWeightNum < < " ! \n " ) ;
}
}
return true ;
2008-06-11 14:52:57 +04:00
}
2010-08-10 17:12:00 +04:00
/* Doesn't load phrase tables any more. Just creates the features. */
2008-06-11 14:52:57 +04:00
bool StaticData : : LoadPhraseTables ( )
{
2011-02-24 16:14:42 +03:00
VERBOSE ( 2 , " Creating phrase table features " < < endl ) ;
// language models must be loaded prior to loading phrase tables
2011-11-18 16:07:41 +04:00
CHECK ( m_fLMsLoaded ) ;
2011-02-24 16:14:42 +03:00
// load phrase translation tables
if ( m_parameter - > GetParam ( " ttable-file " ) . size ( ) > 0 ) {
// weights
vector < float > weightAll = Scan < float > ( m_parameter - > GetParam ( " weight-t " ) ) ;
const vector < string > & translationVector = m_parameter - > GetParam ( " ttable-file " ) ;
vector < size_t > maxTargetPhrase = Scan < size_t > ( m_parameter - > GetParam ( " ttable-limit " ) ) ;
if ( maxTargetPhrase . size ( ) = = 1 & & translationVector . size ( ) > 1 ) {
VERBOSE ( 1 , " Using uniform ttable-limit of " < < maxTargetPhrase [ 0 ] < < " for all translation tables. " < < endl ) ;
for ( size_t i = 1 ; i < translationVector . size ( ) ; i + + )
maxTargetPhrase . push_back ( maxTargetPhrase [ 0 ] ) ;
} else if ( maxTargetPhrase . size ( ) ! = 1 & & maxTargetPhrase . size ( ) < translationVector . size ( ) ) {
stringstream strme ;
strme < < " You specified " < < translationVector . size ( ) < < " translation tables, but only " < < maxTargetPhrase . size ( ) < < " ttable-limits. " ;
UserMessage : : Add ( strme . str ( ) ) ;
return false ;
}
size_t index = 0 ;
size_t weightAllOffset = 0 ;
bool oldFileFormat = false ;
for ( size_t currDict = 0 ; currDict < translationVector . size ( ) ; currDict + + ) {
vector < string > token = Tokenize ( translationVector [ currDict ] ) ;
if ( currDict = = 0 & & token . size ( ) = = 4 ) {
VERBOSE ( 1 , " Warning: Phrase table specification in old 4-field format. Assuming binary phrase tables (type 1)! " < < endl ) ;
oldFileFormat = true ;
}
if ( ( ! oldFileFormat & & token . size ( ) < 5 ) | | ( oldFileFormat & & token . size ( ) ! = 4 ) ) {
UserMessage : : Add ( " invalid phrase table specification " ) ;
return false ;
}
PhraseTableImplementation implementation = ( PhraseTableImplementation ) Scan < int > ( token [ 0 ] ) ;
if ( oldFileFormat ) {
token . push_back ( token [ 3 ] ) ;
token [ 3 ] = token [ 2 ] ;
token [ 2 ] = token [ 1 ] ;
token [ 1 ] = token [ 0 ] ;
token [ 0 ] = " 1 " ;
implementation = Binary ;
} else
implementation = ( PhraseTableImplementation ) Scan < int > ( token [ 0 ] ) ;
2011-11-18 16:07:41 +04:00
CHECK ( token . size ( ) > = 5 ) ;
2011-02-24 16:14:42 +03:00
//characteristics of the phrase table
vector < FactorType > input = Tokenize < FactorType > ( token [ 1 ] , " , " )
, output = Tokenize < FactorType > ( token [ 2 ] , " , " ) ;
m_maxFactorIdx [ 0 ] = CalcMax ( m_maxFactorIdx [ 0 ] , input ) ;
m_maxFactorIdx [ 1 ] = CalcMax ( m_maxFactorIdx [ 1 ] , output ) ;
2008-06-11 14:52:57 +04:00
m_maxNumFactors = std : : max ( m_maxFactorIdx [ 0 ] , m_maxFactorIdx [ 1 ] ) + 1 ;
2011-02-24 16:14:42 +03:00
size_t numScoreComponent = Scan < size_t > ( token [ 3 ] ) ;
string filePath = token [ 4 ] ;
2011-11-18 16:07:41 +04:00
CHECK ( weightAll . size ( ) > = weightAllOffset + numScoreComponent ) ;
2011-02-24 16:14:42 +03:00
// weights for this phrase dictionary
// first InputScores (if any), then translation scores
vector < float > weight ;
if ( currDict = = 0 & & ( m_inputType = = ConfusionNetworkInput | | m_inputType = = WordLatticeInput ) ) {
// TODO. find what the assumptions made by confusion network about phrase table output which makes
// it only work with binrary file. This is a hack
m_numInputScores = m_parameter - > GetParam ( " weight-i " ) . size ( ) ;
2011-10-30 09:51:08 +04:00
if ( implementation = = Binary )
{
for ( unsigned k = 0 ; k < m_numInputScores ; + + k )
weight . push_back ( Scan < float > ( m_parameter - > GetParam ( " weight-i " ) [ k ] ) ) ;
}
2011-02-24 16:14:42 +03:00
if ( m_parameter - > GetParam ( " link-param-count " ) . size ( ) )
m_numLinkParams = Scan < size_t > ( m_parameter - > GetParam ( " link-param-count " ) [ 0 ] ) ;
//print some info about this interaction:
2011-10-30 09:51:08 +04:00
if ( implementation = = Binary ) {
if ( m_numLinkParams = = m_numInputScores ) {
VERBOSE ( 1 , " specified equal numbers of link parameters and insertion weights, not using non-epsilon 'real' word link count. \n " ) ;
} else if ( ( m_numLinkParams + 1 ) = = m_numInputScores ) {
VERBOSE ( 1 , " WARN: " < < m_numInputScores < < " insertion weights found and only " < < m_numLinkParams < < " link parameters specified, applying non-epsilon 'real' word link count for last feature weight. \n " ) ;
} else {
stringstream strme ;
strme < < " You specified " < < m_numInputScores
< < " input weights (weight-i), but you specified " < < m_numLinkParams < < " link parameters (link-param-count)! " ;
UserMessage : : Add ( strme . str ( ) ) ;
return false ;
}
2011-02-24 16:14:42 +03:00
}
2011-10-30 09:51:08 +04:00
2011-02-24 16:14:42 +03:00
}
if ( ! m_inputType ) {
m_numInputScores = 0 ;
}
//this number changes depending on what phrase table we're talking about: only 0 has the weights on it
2011-10-28 16:12:20 +04:00
size_t tableInputScores = ( currDict = = 0 & & implementation = = Binary ) ? m_numInputScores : 0 ;
2011-02-24 16:14:42 +03:00
for ( size_t currScore = 0 ; currScore < numScoreComponent ; currScore + + )
weight . push_back ( weightAll [ weightAllOffset + currScore ] ) ;
2011-10-28 16:12:20 +04:00
2011-02-24 16:14:42 +03:00
if ( weight . size ( ) - tableInputScores ! = numScoreComponent ) {
stringstream strme ;
strme < < " Your phrase table has " < < numScoreComponent
< < " scores, but you specified " < < ( weight . size ( ) - tableInputScores ) < < " weights! " ;
UserMessage : : Add ( strme . str ( ) ) ;
return false ;
}
weightAllOffset + = numScoreComponent ;
numScoreComponent + = tableInputScores ;
string targetPath , alignmentsFile ;
if ( implementation = = SuffixArray ) {
targetPath = token [ 5 ] ;
alignmentsFile = token [ 6 ] ;
}
2011-11-18 16:07:41 +04:00
CHECK ( numScoreComponent = = weight . size ( ) ) ;
2011-02-24 16:14:42 +03:00
//This is needed for regression testing, but the phrase table
//might not really be loading here
IFVERBOSE ( 1 )
PrintUserTime ( string ( " Start loading PhraseTable " ) + filePath ) ;
VERBOSE ( 1 , " filePath: " < < filePath < < endl ) ;
2011-09-20 19:32:26 +04:00
//optional create sparse phrase feature
SparsePhraseDictionaryFeature * spdf = NULL ;
if ( token . size ( ) > = 6 & & token [ 5 ] = = " sparse " ) {
spdf = new SparsePhraseDictionaryFeature ( ) ;
}
m_sparsePhraseDictionary . push_back ( spdf ) ;
2010-10-07 02:06:49 +04:00
PhraseDictionaryFeature * pdf = new PhraseDictionaryFeature (
2011-02-24 16:14:42 +03:00
implementation
2011-09-20 19:32:26 +04:00
, spdf
2011-02-24 16:14:42 +03:00
, numScoreComponent
, ( currDict = = 0 ? m_numInputScores : 0 )
, input
, output
, filePath
, weight
, maxTargetPhrase [ index ]
, targetPath , alignmentsFile ) ;
m_phraseDictionary . push_back ( pdf ) ;
2010-10-07 02:06:49 +04:00
SetWeights ( m_phraseDictionary . back ( ) , weight ) ;
2011-02-24 16:14:42 +03:00
index + + ;
}
}
IFVERBOSE ( 1 )
PrintUserTime ( " Finished loading phrase tables " ) ;
return true ;
2008-06-11 14:52:57 +04:00
}
2010-04-08 21:16:10 +04:00
void StaticData : : LoadNonTerminals ( )
{
2011-02-24 16:14:42 +03:00
string defaultNonTerminals ;
if ( m_parameter - > GetParam ( " non-terminals " ) . size ( ) = = 0 ) {
defaultNonTerminals = " X " ;
} else {
vector < std : : string > tokens = Tokenize ( m_parameter - > GetParam ( " non-terminals " ) [ 0 ] ) ;
defaultNonTerminals = tokens [ 0 ] ;
}
FactorCollection & factorCollection = FactorCollection : : Instance ( ) ;
m_inputDefaultNonTerminal . SetIsNonTerminal ( true ) ;
const Factor * sourceFactor = factorCollection . AddFactor ( Input , 0 , defaultNonTerminals ) ;
m_inputDefaultNonTerminal . SetFactor ( 0 , sourceFactor ) ;
m_outputDefaultNonTerminal . SetIsNonTerminal ( true ) ;
const Factor * targetFactor = factorCollection . AddFactor ( Output , 0 , defaultNonTerminals ) ;
m_outputDefaultNonTerminal . SetFactor ( 0 , targetFactor ) ;
// for unknwon words
if ( m_parameter - > GetParam ( " unknown-lhs " ) . size ( ) = = 0 ) {
UnknownLHSEntry entry ( defaultNonTerminals , 0.0f ) ;
m_unknownLHS . push_back ( entry ) ;
} else {
const string & filePath = m_parameter - > GetParam ( " unknown-lhs " ) [ 0 ] ;
InputFileStream inStream ( filePath ) ;
string line ;
while ( getline ( inStream , line ) ) {
vector < string > tokens = Tokenize ( line ) ;
2011-11-18 16:07:41 +04:00
CHECK ( tokens . size ( ) = = 2 ) ;
2011-02-24 16:14:42 +03:00
UnknownLHSEntry entry ( tokens [ 0 ] , Scan < float > ( tokens [ 1 ] ) ) ;
m_unknownLHS . push_back ( entry ) ;
}
}
2010-04-08 21:16:10 +04:00
}
2011-02-24 16:14:42 +03:00
2010-04-08 21:16:10 +04:00
void StaticData : : LoadChartDecodingParameters ( )
{
2011-02-24 16:14:42 +03:00
LoadNonTerminals ( ) ;
// source label overlap
if ( m_parameter - > GetParam ( " source-label-overlap " ) . size ( ) > 0 ) {
m_sourceLabelOverlap = ( SourceLabelOverlap ) Scan < int > ( m_parameter - > GetParam ( " source-label-overlap " ) [ 0 ] ) ;
} else {
m_sourceLabelOverlap = SourceLabelOverlapAdd ;
}
m_ruleLimit = ( m_parameter - > GetParam ( " rule-limit " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " rule-limit " ) [ 0 ] ) : DEFAULT_MAX_TRANS_OPT_SIZE ;
2010-04-08 21:16:10 +04:00
}
2011-02-24 16:14:42 +03:00
2010-04-08 21:16:10 +04:00
void StaticData : : LoadPhraseBasedParameters ( )
{
2011-02-24 16:14:42 +03:00
const vector < string > distortionWeights = m_parameter - > GetParam ( " weight-d " ) ;
2010-08-10 17:12:00 +04:00
size_t distortionWeightCount = distortionWeights . size ( ) ;
2011-02-24 16:14:42 +03:00
//if there's a lex-reordering model, and no separate weight set, then
2010-08-10 17:12:00 +04:00
//take just one of these weights for linear distortion
if ( ! m_parameter - > GetParam ( " weight-lr " ) . size ( ) & & m_parameter - > GetParam ( " distortion-file " ) . size ( ) ) {
distortionWeightCount = 1 ;
}
for ( size_t i = 0 ; i < distortionWeightCount ; + + i ) {
float weightDistortion = Scan < float > ( distortionWeights [ i ] ) ;
2010-10-07 02:06:49 +04:00
m_distortionScoreProducers . push_back ( new DistortionScoreProducer ( ) ) ;
SetWeight ( m_distortionScoreProducers . back ( ) , weightDistortion ) ;
2010-08-10 17:12:00 +04:00
}
2010-04-08 21:16:10 +04:00
}
2010-08-10 17:12:00 +04:00
2011-02-24 16:14:42 +03:00
bool StaticData : : LoadDecodeGraphs ( )
{
const vector < string > & mappingVector = m_parameter - > GetParam ( " mapping " ) ;
const vector < size_t > & maxChartSpans = Scan < size_t > ( m_parameter - > GetParam ( " max-chart-span " ) ) ;
DecodeStep * prev = 0 ;
size_t prevDecodeGraphInd = 0 ;
for ( size_t i = 0 ; i < mappingVector . size ( ) ; i + + ) {
vector < string > token = Tokenize ( mappingVector [ i ] ) ;
size_t decodeGraphInd ;
DecodeType decodeType ;
size_t index ;
if ( token . size ( ) = = 2 ) {
decodeGraphInd = 0 ;
decodeType = token [ 0 ] = = " T " ? Translate : Generate ;
index = Scan < size_t > ( token [ 1 ] ) ;
} else if ( token . size ( ) = = 3 ) {
// For specifying multiple translation model
decodeGraphInd = Scan < size_t > ( token [ 0 ] ) ;
//the vectorList index can only increment by one
2011-11-18 16:07:41 +04:00
CHECK ( decodeGraphInd = = prevDecodeGraphInd | | decodeGraphInd = = prevDecodeGraphInd + 1 ) ;
2011-02-24 16:14:42 +03:00
if ( decodeGraphInd > prevDecodeGraphInd ) {
2008-06-11 14:52:57 +04:00
prev = NULL ;
}
2011-02-24 16:14:42 +03:00
decodeType = token [ 1 ] = = " T " ? Translate : Generate ;
index = Scan < size_t > ( token [ 2 ] ) ;
} else {
UserMessage : : Add ( " Malformed mapping! " ) ;
2011-11-18 16:07:41 +04:00
CHECK ( false ) ;
2011-02-24 16:14:42 +03:00
}
2011-08-18 01:13:21 +04:00
2011-02-24 16:14:42 +03:00
DecodeStep * decodeStep = NULL ;
switch ( decodeType ) {
case Translate :
if ( index > = m_phraseDictionary . size ( ) ) {
stringstream strme ;
strme < < " No phrase dictionary with index "
< < index < < " available! " ;
UserMessage : : Add ( strme . str ( ) ) ;
2011-11-18 16:07:41 +04:00
CHECK ( false ) ;
2011-02-24 16:14:42 +03:00
}
decodeStep = new DecodeStepTranslation ( m_phraseDictionary [ index ] , prev ) ;
break ;
case Generate :
if ( index > = m_generationDictionary . size ( ) ) {
stringstream strme ;
strme < < " No generation dictionary with index "
< < index < < " available! " ;
UserMessage : : Add ( strme . str ( ) ) ;
2011-11-18 16:07:41 +04:00
CHECK ( false ) ;
2011-02-24 16:14:42 +03:00
}
decodeStep = new DecodeStepGeneration ( m_generationDictionary [ index ] , prev ) ;
break ;
case InsertNullFertilityWord :
2011-11-18 16:07:41 +04:00
CHECK ( ! " Please implement NullFertilityInsertion. " ) ;
2011-02-24 16:14:42 +03:00
break ;
}
2011-11-18 16:07:41 +04:00
CHECK ( decodeStep ) ;
2011-02-24 16:14:42 +03:00
if ( m_decodeGraphs . size ( ) < decodeGraphInd + 1 ) {
DecodeGraph * decodeGraph ;
if ( m_searchAlgorithm = = ChartDecoding ) {
size_t maxChartSpan = ( decodeGraphInd < maxChartSpans . size ( ) ) ? maxChartSpans [ decodeGraphInd ] : DEFAULT_MAX_CHART_SPAN ;
decodeGraph = new DecodeGraph ( m_decodeGraphs . size ( ) , maxChartSpan ) ;
} else {
decodeGraph = new DecodeGraph ( m_decodeGraphs . size ( ) ) ;
}
m_decodeGraphs . push_back ( decodeGraph ) ; // TODO max chart span
}
m_decodeGraphs [ decodeGraphInd ] - > Add ( decodeStep ) ;
prev = decodeStep ;
prevDecodeGraphInd = decodeGraphInd ;
}
// set maximum n-gram size for backoff approach to decoding paths
// default is always use subsequent paths (value = 0)
for ( size_t i = 0 ; i < m_decodeGraphs . size ( ) ; i + + ) {
m_decodeGraphBackoff . push_back ( 0 ) ;
}
// if specified, record maxmimum unseen n-gram size
const vector < string > & backoffVector = m_parameter - > GetParam ( " decoding-graph-backoff " ) ;
for ( size_t i = 0 ; i < m_decodeGraphs . size ( ) & & i < backoffVector . size ( ) ; i + + ) {
m_decodeGraphBackoff [ i ] = Scan < size_t > ( backoffVector [ i ] ) ;
}
return true ;
2008-06-11 14:52:57 +04:00
}
2011-08-19 20:09:36 +04:00
bool StaticData : : LoadReferences ( )
{
2011-10-20 17:32:05 +04:00
vector < string > bleuWeightStr = m_parameter - > GetParam ( " weight-bl " ) ;
2010-09-14 20:25:33 +04:00
vector < string > referenceFiles = m_parameter - > GetParam ( " references " ) ;
2011-08-19 20:09:36 +04:00
if ( ( ! referenceFiles . size ( ) & & bleuWeightStr . size ( ) ) | | ( referenceFiles . size ( ) & & ! bleuWeightStr . size ( ) ) ) {
2010-09-14 20:25:33 +04:00
UserMessage : : Add ( " You cannot use the bleu feature without references, and vice-versa " ) ;
return false ;
}
2010-09-14 20:55:33 +04:00
if ( ! referenceFiles . size ( ) ) {
return true ;
}
2010-09-14 20:25:33 +04:00
if ( bleuWeightStr . size ( ) > 1 ) {
UserMessage : : Add ( " Can only specify one weight for the bleu feature " ) ;
return false ;
}
2012-04-03 17:39:45 +04:00
2010-09-14 20:25:33 +04:00
float bleuWeight = Scan < float > ( bleuWeightStr [ 0 ] ) ;
m_bleuScoreFeature = new BleuScoreFeature ( ) ;
2010-10-07 02:06:49 +04:00
SetWeight ( m_bleuScoreFeature , bleuWeight ) ;
2011-08-19 20:09:36 +04:00
2012-04-03 17:39:45 +04:00
cerr < < " Loading reference file " < < referenceFiles [ 0 ] < < endl ;
2010-09-14 20:25:33 +04:00
vector < vector < string > > references ( referenceFiles . size ( ) ) ;
for ( size_t i = 0 ; i < referenceFiles . size ( ) ; + + i ) {
ifstream in ( referenceFiles [ i ] . c_str ( ) ) ;
if ( ! in ) {
stringstream strme ;
strme < < " Unable to load references from " < < referenceFiles [ i ] ;
UserMessage : : Add ( strme . str ( ) ) ;
return false ;
}
string line ;
while ( getline ( in , line ) ) {
2012-05-29 02:49:39 +04:00
/* if (GetSearchAlgorithm() == ChartDecoding) {
stringstream tmp ;
tmp < < " <s> " < < line < < " </s> " ;
line = tmp . str ( ) ;
} */
2012-01-12 20:30:50 +04:00
references [ i ] . push_back ( line ) ;
2010-09-14 20:25:33 +04:00
}
if ( i > 0 ) {
if ( references [ i ] . size ( ) ! = references [ i - 1 ] . size ( ) ) {
UserMessage : : Add ( " Reference files are of different lengths " ) ;
return false ;
}
}
in . close ( ) ;
}
2010-09-16 16:49:57 +04:00
//Set the references in the bleu feature
m_bleuScoreFeature - > LoadReferences ( references ) ;
2010-09-14 20:25:33 +04:00
return true ;
}
2010-10-15 19:19:17 +04:00
bool StaticData : : LoadDiscrimLMFeature ( )
2010-09-17 17:36:03 +04:00
{
2011-12-07 01:33:23 +04:00
// only load if specified
2012-03-02 22:35:02 +04:00
const vector < string > & wordFile = m_parameter - > GetParam ( " dlm-model " ) ;
2011-08-19 20:09:36 +04:00
if ( wordFile . empty ( ) ) {
return true ;
2010-10-15 19:19:17 +04:00
}
2011-12-07 01:33:23 +04:00
cerr < < " Loading " < < wordFile . size ( ) < < " discriminative language model(s).. " < < endl ;
2011-11-24 23:27:12 +04:00
// if this weight is specified, the sparse DLM weights will be scaled with an additional weight
vector < string > dlmWeightStr = m_parameter - > GetParam ( " weight-dlm " ) ;
vector < float > dlmWeights ;
for ( size_t i = 0 ; i < dlmWeightStr . size ( ) ; + + i )
dlmWeights . push_back ( Scan < float > ( dlmWeightStr [ i ] ) ) ;
2010-10-15 19:19:17 +04:00
2011-11-22 16:15:15 +04:00
for ( size_t i = 0 ; i < wordFile . size ( ) ; + + i ) {
vector < string > tokens = Tokenize ( wordFile [ i ] ) ;
if ( tokens . size ( ) ! = 4 ) {
UserMessage : : Add ( " Format of discriminative language model parameter is <order> <factor> <include-lower-ngrams> <filename> " ) ;
return false ;
}
size_t order = Scan < size_t > ( tokens [ 0 ] ) ;
FactorType factorId = Scan < size_t > ( tokens [ 1 ] ) ;
bool include_lower_ngrams = Scan < bool > ( tokens [ 2 ] ) ;
string filename = tokens [ 3 ] ;
if ( order = = 2 & & ! include_lower_ngrams ) { // TODO: remove TargetBigramFeature ?
m_targetBigramFeature = new TargetBigramFeature ( factorId ) ;
2011-11-24 23:27:12 +04:00
cerr < < " loading vocab from " < < filename < < endl ;
2011-11-22 16:15:15 +04:00
if ( ! m_targetBigramFeature - > Load ( filename ) ) {
UserMessage : : Add ( " Unable to load word list from file " + filename ) ;
return false ;
}
}
else {
2011-12-07 15:19:50 +04:00
if ( m_searchAlgorithm = = ChartDecoding & & ! include_lower_ngrams ) {
UserMessage : : Add ( " Excluding lower order DLM ngrams is currently not supported for chart decoding. " ) ;
return false ;
}
2011-11-22 16:15:15 +04:00
m_targetNgramFeatures . push_back ( new TargetNgramFeature ( factorId , order , include_lower_ngrams ) ) ;
2011-11-24 23:27:12 +04:00
if ( i < dlmWeights . size ( ) )
m_targetNgramFeatures [ i ] - > SetSparseProducerWeight ( dlmWeights [ i ] ) ;
cerr < < " loading vocab from " < < filename < < endl ;
2011-11-22 16:15:15 +04:00
if ( ! m_targetNgramFeatures [ i ] - > Load ( filename ) ) {
UserMessage : : Add ( " Unable to load word list from file " + filename ) ;
return false ;
}
}
2011-08-19 20:09:36 +04:00
}
2010-09-17 17:36:03 +04:00
2011-08-19 20:09:36 +04:00
return true ;
2010-09-17 17:36:03 +04:00
}
2008-06-11 14:52:57 +04:00
2011-08-19 20:09:36 +04:00
bool StaticData : : LoadPhraseBoundaryFeature ( )
2011-05-11 02:02:25 +04:00
{
2012-03-15 04:32:27 +04:00
const vector < float > & weight = Scan < float > ( m_parameter - > GetParam ( " weight-pb " ) ) ;
if ( weight . size ( ) > 1 ) {
std : : cerr < < " only one sparse producer weight allowed for the phrase boundary feature " < < std : : endl ;
return false ;
}
2011-08-19 20:09:36 +04:00
const vector < string > & phraseBoundarySourceFactors =
2011-05-11 02:02:25 +04:00
m_parameter - > GetParam ( " phrase-boundary-source-feature " ) ;
2011-08-19 20:09:36 +04:00
const vector < string > & phraseBoundaryTargetFactors =
2011-05-11 02:02:25 +04:00
m_parameter - > GetParam ( " phrase-boundary-target-feature " ) ;
if ( phraseBoundarySourceFactors . size ( ) = = 0 & & phraseBoundaryTargetFactors . size ( ) = = 0 ) {
return true ;
}
if ( phraseBoundarySourceFactors . size ( ) > 1 ) {
UserMessage : : Add ( " Need to specify comma separated list of source factors for phrase boundary " ) ;
return false ;
}
if ( phraseBoundaryTargetFactors . size ( ) > 1 ) {
UserMessage : : Add ( " Need to specify comma separated list of target factors for phrase boundary " ) ;
return false ;
}
FactorList sourceFactors ;
FactorList targetFactors ;
if ( phraseBoundarySourceFactors . size ( ) ) {
sourceFactors = Tokenize < FactorType > ( phraseBoundarySourceFactors [ 0 ] , " , " ) ;
}
if ( phraseBoundaryTargetFactors . size ( ) ) {
targetFactors = Tokenize < FactorType > ( phraseBoundaryTargetFactors [ 0 ] , " , " ) ;
}
//cerr << "source "; for (size_t i = 0; i < sourceFactors.size(); ++i) cerr << sourceFactors[i] << " "; cerr << endl;
//cerr << "target "; for (size_t i = 0; i < targetFactors.size(); ++i) cerr << targetFactors[i] << " "; cerr << endl;
m_phraseBoundaryFeature = new PhraseBoundaryFeature ( sourceFactors , targetFactors ) ;
2012-03-15 04:52:35 +04:00
if ( weight . size ( ) > 0 )
m_phraseBoundaryFeature - > SetSparseProducerWeight ( weight [ 0 ] ) ;
2011-05-11 02:02:25 +04:00
return true ;
}
2011-08-19 20:09:36 +04:00
bool StaticData : : LoadPhrasePairFeature ( )
2011-03-22 17:33:16 +03:00
{
2012-03-15 04:32:27 +04:00
const vector < float > & weight = Scan < float > ( m_parameter - > GetParam ( " weight-pp " ) ) ;
if ( weight . size ( ) > 1 ) {
2012-03-19 06:45:59 +04:00
std : : cerr < < " Only one sparse producer weight allowed for the phrase pair feature " < < std : : endl ;
2012-03-15 04:32:27 +04:00
return false ;
}
2012-03-19 06:45:59 +04:00
const vector < string > & phrasePairFactors = m_parameter - > GetParam ( " phrase-pair-feature " ) ;
2011-03-22 17:33:16 +03:00
if ( phrasePairFactors . size ( ) = = 0 ) return true ;
2011-03-23 23:42:34 +03:00
if ( phrasePairFactors . size ( ) ! = 1 ) {
2012-03-19 06:45:59 +04:00
UserMessage : : Add ( " Can only have one phrase-pair-feature " ) ;
2011-03-23 23:42:34 +03:00
return false ;
}
2011-08-19 20:09:36 +04:00
vector < string > tokens = Tokenize ( phrasePairFactors [ 0 ] ) ;
2012-03-22 19:04:18 +04:00
// if (! (tokens.size() >= 1 && tokens.size() <= 4)) {
if ( ! ( tokens . size ( ) > = 1 & & tokens . size ( ) < = 5 ) ) {
2012-03-19 06:45:59 +04:00
UserMessage : : Add ( " Format for phrase pair feature: --phrase-pair-feature <factor-src>-<factor-tgt> "
2012-03-20 17:45:25 +04:00
" [simple source-trigger] [ignore-punctuation] " ) ;
2011-03-22 17:33:16 +03:00
return false ;
}
2012-03-19 06:45:59 +04:00
2012-03-19 21:30:41 +04:00
vector < string > factors ;
if ( tokens . size ( ) = = 2 )
factors = Tokenize ( tokens [ 0 ] , " " ) ;
else
factors = Tokenize ( tokens [ 0 ] , " - " ) ;
2012-03-20 17:45:25 +04:00
2012-03-19 06:45:59 +04:00
size_t sourceFactorId = Scan < size_t > ( factors [ 0 ] ) ;
size_t targetFactorId = Scan < size_t > ( factors [ 1 ] ) ;
2012-04-29 08:37:48 +04:00
bool simple = true , sourceContext = false , ignorePunctuation = false ;
2012-03-20 17:45:25 +04:00
if ( tokens . size ( ) > = 3 ) {
2012-03-22 19:04:18 +04:00
simple = Scan < size_t > ( tokens [ 1 ] ) ;
sourceContext = Scan < size_t > ( tokens [ 2 ] ) ;
2012-03-19 06:45:59 +04:00
}
2012-03-20 17:45:25 +04:00
if ( tokens . size ( ) = = 4 )
2012-03-22 19:04:18 +04:00
ignorePunctuation = Scan < size_t > ( tokens [ 3 ] ) ;
2012-03-19 06:45:59 +04:00
2012-03-22 19:04:18 +04:00
// temporary
string filePath = " " ;
if ( tokens . size ( ) = = 5 )
filePath = Scan < string > ( tokens [ 4 ] ) ;
2012-03-20 17:45:25 +04:00
m_phrasePairFeature = new PhrasePairFeature ( sourceFactorId , targetFactorId , simple , sourceContext ,
2012-03-22 19:04:18 +04:00
ignorePunctuation , filePath ) ;
2012-03-15 04:52:35 +04:00
if ( weight . size ( ) > 0 )
m_phrasePairFeature - > SetSparseProducerWeight ( weight [ 0 ] ) ;
2011-03-22 17:33:16 +03:00
return true ;
}
2011-08-06 18:10:43 +04:00
bool StaticData : : LoadPhraseLengthFeature ( )
{
if ( m_parameter - > isParamSpecified ( " phrase-length-feature " ) ) {
m_phraseLengthFeature = new PhraseLengthFeature ( ) ;
}
return true ;
}
2011-08-13 04:25:23 +04:00
bool StaticData : : LoadTargetWordInsertionFeature ( )
{
2011-08-19 20:09:36 +04:00
const vector < string > & parameters = m_parameter - > GetParam ( " target-word-insertion-feature " ) ;
if ( parameters . empty ( ) )
return true ;
2011-08-13 04:25:23 +04:00
2011-08-19 20:09:36 +04:00
if ( parameters . size ( ) ! = 1 ) {
UserMessage : : Add ( " Can only have one target-word-insertion-feature " ) ;
return false ;
}
2011-08-13 04:25:23 +04:00
2011-08-19 20:09:36 +04:00
vector < string > tokens = Tokenize ( parameters [ 0 ] ) ;
2011-08-13 04:25:23 +04:00
if ( tokens . size ( ) ! = 1 & & tokens . size ( ) ! = 2 ) {
2011-08-13 05:39:35 +04:00
UserMessage : : Add ( " Format of target word insertion feature parameter is: --target-word-insertion-feature <factor> [filename] " ) ;
2011-08-13 04:25:23 +04:00
return false ;
}
2012-05-28 10:03:45 +04:00
if ( ! m_UseAlignmentInfo & & GetSearchAlgorithm ( ) ! = ChartDecoding ) {
2011-08-13 04:25:23 +04:00
UserMessage : : Add ( " Target word insertion feature needs word alignments in phrase table. " ) ;
return false ;
}
2011-08-13 05:39:35 +04:00
// set factor
FactorType factorId = Scan < size_t > ( tokens [ 0 ] ) ;
2011-08-13 04:25:23 +04:00
m_targetWordInsertionFeature = new TargetWordInsertionFeature ( factorId ) ;
2011-08-13 05:39:35 +04:00
// load word list for restricted feature set
if ( tokens . size ( ) = = 2 ) {
string filename = tokens [ 1 ] ;
cerr < < " loading target word insertion word list from " < < filename < < endl ;
2011-08-19 20:09:36 +04:00
if ( ! m_targetWordInsertionFeature - > Load ( filename ) ) {
UserMessage : : Add ( " Unable to load word list for target word insertion feature from file " + filename ) ;
return false ;
2011-08-13 05:39:35 +04:00
}
2011-08-19 20:09:36 +04:00
}
2011-08-13 05:39:35 +04:00
return true ;
}
bool StaticData : : LoadSourceWordDeletionFeature ( )
{
2011-08-19 20:09:36 +04:00
const vector < string > & parameters = m_parameter - > GetParam ( " source-word-deletion-feature " ) ;
if ( parameters . empty ( ) )
return true ;
2011-08-13 05:39:35 +04:00
2011-08-19 20:09:36 +04:00
if ( parameters . size ( ) ! = 1 ) {
UserMessage : : Add ( " Can only have one source-word-deletion-feature " ) ;
return false ;
}
2011-08-13 05:39:35 +04:00
2011-08-19 20:09:36 +04:00
vector < string > tokens = Tokenize ( parameters [ 0 ] ) ;
2011-08-13 05:39:35 +04:00
if ( tokens . size ( ) ! = 1 & & tokens . size ( ) ! = 2 ) {
UserMessage : : Add ( " Format of source word deletion feature parameter is: --source-word-deletion-feature <factor> [filename] " ) ;
return false ;
}
2012-05-28 10:03:45 +04:00
if ( ! m_UseAlignmentInfo & & GetSearchAlgorithm ( ) ! = ChartDecoding ) {
2011-08-13 05:39:35 +04:00
UserMessage : : Add ( " Source word deletion feature needs word alignments in phrase table. " ) ;
return false ;
}
// set factor
FactorType factorId = Scan < size_t > ( tokens [ 0 ] ) ;
m_sourceWordDeletionFeature = new SourceWordDeletionFeature ( factorId ) ;
// load word list for restricted feature set
if ( tokens . size ( ) = = 2 ) {
string filename = tokens [ 1 ] ;
cerr < < " loading source word deletion word list from " < < filename < < endl ;
2011-08-19 20:09:36 +04:00
if ( ! m_sourceWordDeletionFeature - > Load ( filename ) ) {
UserMessage : : Add ( " Unable to load word list for source word deletion feature from file " + filename ) ;
return false ;
2011-08-13 05:39:35 +04:00
}
2011-08-19 20:09:36 +04:00
}
2011-08-13 05:39:35 +04:00
2011-08-13 04:25:23 +04:00
return true ;
}
2011-08-13 06:40:54 +04:00
bool StaticData : : LoadWordTranslationFeature ( )
{
2012-03-15 04:32:27 +04:00
const vector < float > & weight = Scan < float > ( m_parameter - > GetParam ( " weight-wt " ) ) ;
if ( weight . size ( ) > 1 ) {
2012-03-19 06:45:59 +04:00
std : : cerr < < " Only one sparse producer weight allowed for the word translation feature " < < std : : endl ;
2012-03-15 04:32:27 +04:00
return false ;
}
2011-08-19 20:09:36 +04:00
const vector < string > & parameters = m_parameter - > GetParam ( " word-translation-feature " ) ;
if ( parameters . empty ( ) )
return true ;
2011-08-13 06:40:54 +04:00
2011-08-19 20:09:36 +04:00
if ( parameters . size ( ) ! = 1 ) {
UserMessage : : Add ( " Can only have one word-translation-feature " ) ;
return false ;
}
2011-08-13 06:40:54 +04:00
2011-08-19 20:09:36 +04:00
vector < string > tokens = Tokenize ( parameters [ 0 ] ) ;
2012-03-20 17:45:25 +04:00
if ( tokens . size ( ) ! = 1 & & tokens . size ( ) ! = 4 & & tokens . size ( ) ! = 5 & & tokens . size ( ) ! = 7 ) {
2012-03-07 18:04:25 +04:00
UserMessage : : Add ( " Format of word translation feature parameter is: --word-translation-feature <factor-src>-<factor-tgt> "
2012-03-20 17:45:25 +04:00
" [simple source-trigger target-trigger] [ignore-punctuation] [filename-src filename-tgt] " ) ;
2011-08-13 06:40:54 +04:00
return false ;
}
2012-05-28 10:03:45 +04:00
if ( ! m_UseAlignmentInfo & & GetSearchAlgorithm ( ) ! = ChartDecoding ) {
2011-08-13 06:40:54 +04:00
UserMessage : : Add ( " Word translation feature needs word alignments in phrase table. " ) ;
return false ;
}
// set factor
2012-03-07 18:04:25 +04:00
vector < string > factors = Tokenize ( tokens [ 0 ] , " - " ) ;
FactorType factorIdSource = Scan < size_t > ( factors [ 0 ] ) ;
FactorType factorIdTarget = Scan < size_t > ( factors [ 1 ] ) ;
2012-03-20 17:45:25 +04:00
2012-04-29 08:37:48 +04:00
bool simple = true , sourceTrigger = false , targetTrigger = false , ignorePunctuation = false ;
2012-03-07 18:04:25 +04:00
if ( tokens . size ( ) > = 4 ) {
simple = Scan < size_t > ( tokens [ 1 ] ) ;
sourceTrigger = Scan < size_t > ( tokens [ 2 ] ) ;
targetTrigger = Scan < size_t > ( tokens [ 3 ] ) ;
}
2012-03-20 17:45:25 +04:00
if ( tokens . size ( ) > = 5 ) {
ignorePunctuation = Scan < size_t > ( tokens [ 4 ] ) ;
}
2012-02-27 00:14:49 +04:00
2012-03-07 18:04:25 +04:00
m_wordTranslationFeature = new WordTranslationFeature ( factorIdSource , factorIdTarget , simple ,
2012-03-20 17:45:25 +04:00
sourceTrigger , targetTrigger , ignorePunctuation ) ;
2012-03-15 04:52:35 +04:00
if ( weight . size ( ) > 0 )
m_wordTranslationFeature - > SetSparseProducerWeight ( weight [ 0 ] ) ;
2011-08-13 06:40:54 +04:00
// load word list for restricted feature set
2012-03-20 17:45:25 +04:00
if ( tokens . size ( ) = = 7 ) {
2012-03-07 18:04:25 +04:00
string filenameSource = tokens [ 5 ] ;
string filenameTarget = tokens [ 6 ] ;
2011-08-13 06:40:54 +04:00
cerr < < " loading word translation word lists from " < < filenameSource < < " and " < < filenameTarget < < endl ;
2011-08-19 20:09:36 +04:00
if ( ! m_wordTranslationFeature - > Load ( filenameSource , filenameTarget ) ) {
UserMessage : : Add ( " Unable to load word lists for word translation feature from files " + filenameSource + " and " + filenameTarget ) ;
return false ;
2011-08-13 06:40:54 +04:00
}
2011-08-19 20:09:36 +04:00
}
2011-08-13 06:40:54 +04:00
return true ;
}
2008-10-14 23:25:18 +04:00
const TranslationOptionList * StaticData : : FindTransOptListInCache ( const DecodeGraph & decodeGraph , const Phrase & sourcePhrase ) const
2008-06-11 14:52:57 +04:00
{
2011-02-24 16:14:42 +03:00
std : : pair < size_t , Phrase > key ( decodeGraph . GetPosition ( ) , sourcePhrase ) ;
# ifdef WITH_THREADS
boost : : mutex : : scoped_lock lock ( m_transOptCacheMutex ) ;
# endif
std : : map < std : : pair < size_t , Phrase > , std : : pair < TranslationOptionList * , clock_t > > : : iterator iter
2011-08-19 20:09:36 +04:00
= m_transOptCache . find ( key ) ;
2011-02-24 16:14:42 +03:00
if ( iter = = m_transOptCache . end ( ) )
return NULL ;
iter - > second . second = clock ( ) ; // update last used time
return iter - > second . first ;
2009-01-01 21:16:54 +03:00
}
void StaticData : : ReduceTransOptCache ( ) const
{
2011-02-24 16:14:42 +03:00
if ( m_transOptCache . size ( ) < = m_transOptCacheMaxSize ) return ; // not full
clock_t t = clock ( ) ;
// find cutoff for last used time
priority_queue < clock_t > lastUsedTimes ;
std : : map < std : : pair < size_t , Phrase > , std : : pair < TranslationOptionList * , clock_t > > : : iterator iter ;
iter = m_transOptCache . begin ( ) ;
while ( iter ! = m_transOptCache . end ( ) ) {
lastUsedTimes . push ( iter - > second . second ) ;
iter + + ;
}
for ( size_t i = 0 ; i < lastUsedTimes . size ( ) - m_transOptCacheMaxSize / 2 ; i + + )
lastUsedTimes . pop ( ) ;
clock_t cutoffLastUsedTime = lastUsedTimes . top ( ) ;
// remove all old entries
iter = m_transOptCache . begin ( ) ;
while ( iter ! = m_transOptCache . end ( ) ) {
if ( iter - > second . second < cutoffLastUsedTime ) {
std : : map < std : : pair < size_t , Phrase > , std : : pair < TranslationOptionList * , clock_t > > : : iterator iterRemove = iter + + ;
delete iterRemove - > second . first ;
m_transOptCache . erase ( iterRemove ) ;
} else iter + + ;
}
VERBOSE ( 2 , " Reduced persistent translation option cache in " < < ( ( clock ( ) - t ) / ( float ) CLOCKS_PER_SEC ) < < " seconds. " < < std : : endl ) ;
2008-06-11 14:52:57 +04:00
}
2008-10-14 23:25:18 +04:00
void StaticData : : AddTransOptListToCache ( const DecodeGraph & decodeGraph , const Phrase & sourcePhrase , const TranslationOptionList & transOptList ) const
{
2011-02-24 16:14:42 +03:00
if ( m_transOptCacheMaxSize = = 0 ) return ;
std : : pair < size_t , Phrase > key ( decodeGraph . GetPosition ( ) , sourcePhrase ) ;
TranslationOptionList * storedTransOptList = new TranslationOptionList ( transOptList ) ;
# ifdef WITH_THREADS
boost : : mutex : : scoped_lock lock ( m_transOptCacheMutex ) ;
2009-08-07 20:47:54 +04:00
# endif
2011-02-24 16:14:42 +03:00
m_transOptCache [ key ] = make_pair ( storedTransOptList , clock ( ) ) ;
ReduceTransOptCache ( ) ;
2008-10-14 23:25:18 +04:00
}
2011-05-31 13:42:27 +04:00
void StaticData : : ClearTransOptionCache ( ) const {
map < std : : pair < size_t , Phrase > , std : : pair < TranslationOptionList * , clock_t > > : : iterator iterCache ;
for ( iterCache = m_transOptCache . begin ( ) ; iterCache ! = m_transOptCache . end ( ) ; + + iterCache ) {
TranslationOptionList * transOptList = iterCache - > second . first ;
delete transOptList ;
}
}
2008-10-14 23:25:18 +04:00
2010-09-14 13:42:37 +04:00
void StaticData : : ReLoadParameter ( )
{
2011-08-19 20:09:36 +04:00
m_verboseLevel = 1 ;
if ( m_parameter - > GetParam ( " verbose " ) . size ( ) = = 1 ) {
m_verboseLevel = Scan < size_t > ( m_parameter - > GetParam ( " verbose " ) [ 0 ] ) ;
}
// check whether "weight-u" is already set
if ( m_parameter - > isParamShortNameSpecified ( " u " ) ) {
if ( m_parameter - > GetParamShortName ( " u " ) . size ( ) < 1 ) {
PARAM_VEC w ( 1 , " 1.0 " ) ;
m_parameter - > OverwriteParamShortName ( " u " , w ) ;
}
}
//loop over all ScoreProducer to update weights
const TranslationSystem & transSystem = GetTranslationSystem ( TranslationSystem : : DEFAULT ) ;
std : : vector < const ScoreProducer * > : : const_iterator iterSP ;
for ( iterSP = transSystem . GetFeatureFunctions ( ) . begin ( ) ; iterSP ! = transSystem . GetFeatureFunctions ( ) . end ( ) ; + + iterSP ) {
std : : string paramShortName = ( * iterSP ) - > GetScoreProducerWeightShortName ( ) ;
vector < float > Weights = Scan < float > ( m_parameter - > GetParamShortName ( paramShortName ) ) ;
if ( paramShortName = = " d " ) { //basic distortion model takes the first weight
if ( ( * iterSP ) - > GetScoreProducerDescription ( ) = = " Distortion " ) {
Weights . resize ( 1 ) ; //take only the first element
} else { //lexicalized reordering model takes the other
Weights . erase ( Weights . begin ( ) ) ; //remove the first element
}
// std::cerr << "this is the Distortion Score Producer -> " << (*iterSP)->GetScoreProducerDescription() << std::cerr;
// std::cerr << "this is the Distortion Score Producer; it has " << (*iterSP)->GetNumScoreComponents() << " weights"<< std::cerr;
// std::cerr << Weights << std::endl;
} else if ( paramShortName = = " tm " ) {
continue ;
}
SetWeights ( * iterSP , Weights ) ;
}
// std::cerr << "There are " << m_phraseDictionary.size() << " m_phraseDictionaryfeatures" << std::endl;
const vector < float > WeightsTM = Scan < float > ( m_parameter - > GetParamShortName ( " tm " ) ) ;
// std::cerr << "WeightsTM: " << WeightsTM << std::endl;
const vector < float > WeightsLM = Scan < float > ( m_parameter - > GetParamShortName ( " lm " ) ) ;
// std::cerr << "WeightsLM: " << WeightsLM << std::endl;
size_t index_WeightTM = 0 ;
for ( size_t i = 0 ; i < transSystem . GetPhraseDictionaries ( ) . size ( ) ; + + i ) {
PhraseDictionaryFeature & phraseDictionaryFeature = * m_phraseDictionary [ i ] ;
// std::cerr << "phraseDictionaryFeature.GetNumScoreComponents():" << phraseDictionaryFeature.GetNumScoreComponents() << std::endl;
// std::cerr << "phraseDictionaryFeature.GetNumInputScores():" << phraseDictionaryFeature.GetNumInputScores() << std::endl;
vector < float > tmp_weights ;
for ( size_t j = 0 ; j < phraseDictionaryFeature . GetNumScoreComponents ( ) ; + + j )
tmp_weights . push_back ( WeightsTM [ index_WeightTM + + ] ) ;
// std::cerr << tmp_weights << std::endl;
SetWeights ( & phraseDictionaryFeature , tmp_weights ) ;
}
2010-09-14 13:42:37 +04:00
}
2010-09-17 18:25:08 +04:00
2011-11-16 13:13:17 +04:00
void StaticData : : ReLoadBleuScoreFeatureParameter ( float weight )
2010-11-24 20:06:54 +03:00
{
2011-11-16 13:13:17 +04:00
//loop over ScoreProducers to update weights of BleuScoreFeature
2011-08-19 20:09:36 +04:00
const TranslationSystem & transSystem = GetTranslationSystem ( TranslationSystem : : DEFAULT ) ;
std : : vector < const ScoreProducer * > : : const_iterator iterSP ;
for ( iterSP = transSystem . GetFeatureFunctions ( ) . begin ( ) ; iterSP ! = transSystem . GetFeatureFunctions ( ) . end ( ) ; + + iterSP ) {
std : : string paramShortName = ( * iterSP ) - > GetScoreProducerWeightShortName ( ) ;
if ( paramShortName = = " bl " ) {
2011-11-16 13:13:17 +04:00
SetWeight ( * iterSP , weight ) ;
break ;
2011-08-19 20:09:36 +04:00
}
}
2010-11-24 20:06:54 +03:00
}
2010-09-17 18:25:08 +04:00
// ScoreComponentCollection StaticData::GetAllWeightsScoreComponentCollection() const {}
// in ScoreComponentCollection.h
2011-08-19 20:09:36 +04:00
2008-10-09 03:51:26 +04:00
}