2008-06-11 14:52:57 +04:00
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase - based language decoder
Copyright ( C ) 2006 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2013-09-06 10:53:17 +04:00
2008-06-11 14:52:57 +04:00
# include <string>
2011-11-18 16:07:41 +04:00
# include "util/check.hh"
2013-07-19 01:54:52 +04:00
# include "moses/FF/Factory.h"
# include "moses/FF/WordPenaltyProducer.h"
# include "moses/FF/UnknownWordPenaltyProducer.h"
# include "moses/FF/InputFeature.h"
2013-05-31 18:02:34 +04:00
2008-06-11 14:52:57 +04:00
# include "DecodeStepTranslation.h"
# include "DecodeStepGeneration.h"
# include "GenerationDictionary.h"
# include "StaticData.h"
# include "Util.h"
# include "FactorCollection.h"
# include "Timer.h"
# include "SentenceStats.h"
# include "UserMessage.h"
# include "TranslationOption.h"
# include "DecodeGraph.h"
2008-06-19 03:14:09 +04:00
# include "InputFileStream.h"
2010-09-17 18:25:08 +04:00
# include "ScoreComponentCollection.h"
2013-05-10 14:11:47 +04:00
2011-09-23 02:29:56 +04:00
# ifdef WITH_THREADS
# include <boost/thread.hpp>
# endif
2008-06-11 14:52:57 +04:00
using namespace std ;
2008-10-09 03:51:26 +04:00
namespace Moses
{
2013-01-01 21:27:26 +04:00
2008-06-11 14:52:57 +04:00
StaticData StaticData : : s_instance ;
StaticData : : StaticData ( )
2013-01-29 20:57:56 +04:00
: m_sourceStartPosMattersForRecombination ( false )
2011-02-24 16:14:42 +03:00
, m_inputType ( SentenceInput )
2013-06-05 17:06:04 +04:00
, m_wpProducer ( NULL )
, m_unknownWordPenaltyProducer ( NULL )
, m_inputFeature ( NULL )
2011-02-24 16:14:42 +03:00
, m_detailedTranslationReportingFilePath ( )
, m_onlyDistinctNBest ( false )
2013-06-05 17:06:04 +04:00
, m_needAlignmentInfo ( false )
2011-02-24 16:14:42 +03:00
, m_factorDelimiter ( " | " ) // default delimiter between factors
2011-09-20 14:23:38 +04:00
, m_lmEnableOOVFeature ( false )
2011-02-24 16:14:42 +03:00
, m_isAlwaysCreateDirectTranslationOption ( false )
2013-06-05 17:06:04 +04:00
, m_currentWeightSetting ( " default " )
2008-06-11 14:52:57 +04:00
{
2011-11-16 16:38:22 +04:00
m_xmlBrackets . first = " < " ;
m_xmlBrackets . second = " > " ;
2011-02-24 16:14:42 +03:00
// memory pools
Phrase : : InitializeMemPool ( ) ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
2013-05-22 19:19:31 +04:00
StaticData : : ~ StaticData ( )
{
RemoveAllInColl ( m_decodeGraphs ) ;
2013-05-24 03:21:16 +04:00
/*
2013-05-22 19:19:31 +04:00
const std : : vector < FeatureFunction * > & producers = FeatureFunction : : GetFeatureFunctions ( ) ;
for ( size_t i = 0 ; i < producers . size ( ) ; + + i ) {
2013-05-29 21:16:15 +04:00
FeatureFunction * ff = producers [ i ] ;
2013-05-22 19:19:31 +04:00
delete ff ;
}
2013-05-24 03:21:16 +04:00
*/
2013-05-22 19:19:31 +04:00
// memory pools
Phrase : : FinalizeMemPool ( ) ;
}
2013-05-29 21:16:15 +04:00
bool StaticData : : LoadDataStatic ( Parameter * parameter , const std : : string & execPath )
{
2012-07-31 00:07:19 +04:00
s_instance . SetExecPath ( execPath ) ;
return s_instance . LoadData ( parameter ) ;
2012-04-29 08:37:48 +04:00
}
2008-06-11 14:52:57 +04:00
bool StaticData : : LoadData ( Parameter * parameter )
{
2011-02-24 16:14:42 +03:00
ResetUserTime ( ) ;
m_parameter = parameter ;
// verbose level
m_verboseLevel = 1 ;
if ( m_parameter - > GetParam ( " verbose " ) . size ( ) = = 1 ) {
m_verboseLevel = Scan < size_t > ( m_parameter - > GetParam ( " verbose " ) [ 0 ] ) ;
2008-06-11 14:52:57 +04:00
}
2012-01-26 15:38:40 +04:00
m_parsingAlgorithm = ( m_parameter - > GetParam ( " parsing-algorithm " ) . size ( ) > 0 ) ?
2013-05-29 21:16:15 +04:00
( ParsingAlgorithm ) Scan < size_t > ( m_parameter - > GetParam ( " parsing-algorithm " ) [ 0 ] ) : ParseCYKPlus ;
2012-01-26 15:38:40 +04:00
2011-02-24 16:14:42 +03:00
// to cube or not to cube
m_searchAlgorithm = ( m_parameter - > GetParam ( " search-algorithm " ) . size ( ) > 0 ) ?
( SearchAlgorithm ) Scan < size_t > ( m_parameter - > GetParam ( " search-algorithm " ) [ 0 ] ) : Normal ;
2012-10-12 17:09:45 +04:00
if ( IsChart ( ) )
2011-02-24 16:14:42 +03:00
LoadChartDecodingParameters ( ) ;
// input type has to be specified BEFORE loading the phrase tables!
if ( m_parameter - > GetParam ( " inputtype " ) . size ( ) )
m_inputType = ( InputTypeEnum ) Scan < int > ( m_parameter - > GetParam ( " inputtype " ) [ 0 ] ) ;
std : : string s_it = " text input " ;
if ( m_inputType = = 1 ) {
s_it = " confusion net " ;
}
if ( m_inputType = = 2 ) {
s_it = " word lattice " ;
}
VERBOSE ( 2 , " input type is: " < < s_it < < " \n " ) ;
if ( m_parameter - > GetParam ( " recover-input-path " ) . size ( ) ) {
m_recoverPath = Scan < bool > ( m_parameter - > GetParam ( " recover-input-path " ) [ 0 ] ) ;
if ( m_recoverPath & & m_inputType = = SentenceInput ) {
TRACE_ERR ( " --recover-input-path should only be used with confusion net or word lattice input! \n " ) ;
m_recoverPath = false ;
2011-02-03 12:08:42 +03:00
}
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
// factor delimiter
if ( m_parameter - > GetParam ( " factor-delimiter " ) . size ( ) > 0 ) {
m_factorDelimiter = m_parameter - > GetParam ( " factor-delimiter " ) [ 0 ] ;
}
SetBooleanParameter ( & m_continuePartialTranslation , " continue-partial-translation " , false ) ;
2012-11-14 23:01:25 +04:00
SetBooleanParameter ( & m_outputHypoScore , " output-hypo-score " , false ) ;
2011-02-24 16:14:42 +03:00
//word-to-word alignment
2013-03-13 16:12:33 +04:00
// alignments
SetBooleanParameter ( & m_PrintAlignmentInfo , " print-alignment-info " , false ) ;
if ( m_PrintAlignmentInfo ) {
m_needAlignmentInfo = true ;
}
if ( m_parameter - > GetParam ( " sort-word-alignment " ) . size ( ) ) {
m_wordAlignmentSort = ( WordAlignmentSort ) Scan < size_t > ( m_parameter - > GetParam ( " sort-word-alignment " ) [ 0 ] ) ;
}
2011-02-24 16:14:42 +03:00
SetBooleanParameter ( & m_PrintAlignmentInfoNbest , " print-alignment-info-in-n-best " , false ) ;
2012-11-14 23:01:25 +04:00
if ( m_PrintAlignmentInfoNbest ) {
m_needAlignmentInfo = true ;
2011-02-24 16:14:42 +03:00
}
if ( m_parameter - > GetParam ( " alignment-output-file " ) . size ( ) > 0 ) {
m_alignmentOutputFile = Scan < std : : string > ( m_parameter - > GetParam ( " alignment-output-file " ) [ 0 ] ) ;
2012-11-14 23:01:25 +04:00
m_needAlignmentInfo = true ;
2011-02-24 16:14:42 +03:00
}
// n-best
if ( m_parameter - > GetParam ( " n-best-list " ) . size ( ) > = 2 ) {
m_nBestFilePath = m_parameter - > GetParam ( " n-best-list " ) [ 0 ] ;
m_nBestSize = Scan < size_t > ( m_parameter - > GetParam ( " n-best-list " ) [ 1 ] ) ;
m_onlyDistinctNBest = ( m_parameter - > GetParam ( " n-best-list " ) . size ( ) > 2 & & m_parameter - > GetParam ( " n-best-list " ) [ 2 ] = = " distinct " ) ;
} else if ( m_parameter - > GetParam ( " n-best-list " ) . size ( ) = = 1 ) {
2011-10-04 19:46:24 +04:00
UserMessage : : Add ( string ( " wrong format for switch -n-best-list file size " ) ) ;
2011-02-24 16:14:42 +03:00
return false ;
} else {
m_nBestSize = 0 ;
}
if ( m_parameter - > GetParam ( " n-best-factor " ) . size ( ) > 0 ) {
m_nBestFactor = Scan < size_t > ( m_parameter - > GetParam ( " n-best-factor " ) [ 0 ] ) ;
} else {
m_nBestFactor = 20 ;
2008-06-11 14:52:57 +04:00
}
2013-05-29 21:16:15 +04:00
2011-10-04 19:46:24 +04:00
//lattice samples
if ( m_parameter - > GetParam ( " lattice-samples " ) . size ( ) = = 2 ) {
m_latticeSamplesFilePath = m_parameter - > GetParam ( " lattice-samples " ) [ 0 ] ;
m_latticeSamplesSize = Scan < size_t > ( m_parameter - > GetParam ( " lattice-samples " ) [ 1 ] ) ;
} else if ( m_parameter - > GetParam ( " lattice-samples " ) . size ( ) ! = 0 ) {
UserMessage : : Add ( string ( " wrong format for switch -lattice-samples file size " ) ) ;
return false ;
} else {
m_latticeSamplesSize = 0 ;
}
2011-02-24 16:14:42 +03:00
// word graph
if ( m_parameter - > GetParam ( " output-word-graph " ) . size ( ) = = 2 )
m_outputWordGraph = true ;
else
m_outputWordGraph = false ;
// search graph
if ( m_parameter - > GetParam ( " output-search-graph " ) . size ( ) > 0 ) {
if ( m_parameter - > GetParam ( " output-search-graph " ) . size ( ) ! = 1 ) {
UserMessage : : Add ( string ( " ERROR: wrong format for switch -output-search-graph file " ) ) ;
return false ;
2011-08-18 01:13:21 +04:00
}
2011-02-24 16:14:42 +03:00
m_outputSearchGraph = true ;
}
// ... in extended format
else if ( m_parameter - > GetParam ( " output-search-graph-extended " ) . size ( ) > 0 ) {
if ( m_parameter - > GetParam ( " output-search-graph-extended " ) . size ( ) ! = 1 ) {
UserMessage : : Add ( string ( " ERROR: wrong format for switch -output-search-graph-extended file " ) ) ;
return false ;
}
m_outputSearchGraph = true ;
m_outputSearchGraphExtended = true ;
2013-02-15 22:06:54 +04:00
} else {
2011-02-24 16:14:42 +03:00
m_outputSearchGraph = false ;
2013-02-15 22:06:54 +04:00
}
if ( m_parameter - > GetParam ( " output-search-graph-slf " ) . size ( ) > 0 ) {
m_outputSearchGraphSLF = true ;
2013-02-22 21:24:35 +04:00
} else {
m_outputSearchGraphSLF = false ;
}
if ( m_parameter - > GetParam ( " output-search-graph-hypergraph " ) . size ( ) > 0 ) {
m_outputSearchGraphHypergraph = true ;
} else {
m_outputSearchGraphHypergraph = false ;
2013-02-15 22:06:54 +04:00
}
2008-09-24 20:48:23 +04:00
# ifdef HAVE_PROTOBUF
2011-02-24 16:14:42 +03:00
if ( m_parameter - > GetParam ( " output-search-graph-pb " ) . size ( ) > 0 ) {
if ( m_parameter - > GetParam ( " output-search-graph-pb " ) . size ( ) ! = 1 ) {
UserMessage : : Add ( string ( " ERROR: wrong format for switch -output-search-graph-pb path " ) ) ;
return false ;
}
m_outputSearchGraphPB = true ;
} else
m_outputSearchGraphPB = false ;
2008-09-24 20:48:23 +04:00
# endif
2012-09-03 10:23:32 +04:00
SetBooleanParameter ( & m_unprunedSearchGraph , " unpruned-search-graph " , false ) ;
SetBooleanParameter ( & m_includeLHSInSearchGraph , " include-lhs-in-search-graph " , false ) ;
2013-05-29 21:16:15 +04:00
2012-09-21 11:55:37 +04:00
if ( m_parameter - > isParamSpecified ( " output-unknowns " ) ) {
if ( m_parameter - > GetParam ( " output-unknowns " ) . size ( ) = = 1 ) {
2013-05-29 21:16:15 +04:00
m_outputUnknownsFile = Scan < string > ( m_parameter - > GetParam ( " output-unknowns " ) [ 0 ] ) ;
2012-09-21 11:55:37 +04:00
} else {
UserMessage : : Add ( string ( " need to specify exactly one file name for unknowns " ) ) ;
return false ;
}
}
2008-06-11 14:52:57 +04:00
2011-02-24 16:14:42 +03:00
// include feature names in the n-best list
SetBooleanParameter ( & m_labeledNBestList , " labeled-n-best-list " , true ) ;
// include word alignment in the n-best list
2012-11-14 20:47:16 +04:00
SetBooleanParameter ( & m_nBestIncludesSegmentation , " include-segmentation-in-n-best " , false ) ;
2011-02-24 16:14:42 +03:00
// printing source phrase spans
SetBooleanParameter ( & m_reportSegmentation , " report-segmentation " , false ) ;
2013-08-07 08:31:45 +04:00
SetBooleanParameter ( & m_reportSegmentationEnriched , " report-segmentation-enriched " , false ) ;
2011-02-24 16:14:42 +03:00
// print all factors of output translations
SetBooleanParameter ( & m_reportAllFactors , " report-all-factors " , false ) ;
// print all factors of output translations
SetBooleanParameter ( & m_reportAllFactorsNBest , " report-all-factors-in-n-best " , false ) ;
//input factors
const vector < string > & inputFactorVector = m_parameter - > GetParam ( " input-factors " ) ;
for ( size_t i = 0 ; i < inputFactorVector . size ( ) ; i + + ) {
m_inputFactorOrder . push_back ( Scan < FactorType > ( inputFactorVector [ i ] ) ) ;
}
if ( m_inputFactorOrder . empty ( ) ) {
UserMessage : : Add ( string ( " no input factor specified in config file " ) ) ;
return false ;
}
//output factors
const vector < string > & outputFactorVector = m_parameter - > GetParam ( " output-factors " ) ;
for ( size_t i = 0 ; i < outputFactorVector . size ( ) ; i + + ) {
m_outputFactorOrder . push_back ( Scan < FactorType > ( outputFactorVector [ i ] ) ) ;
}
if ( m_outputFactorOrder . empty ( ) ) {
// default. output factor 0
m_outputFactorOrder . push_back ( 0 ) ;
}
//source word deletion
SetBooleanParameter ( & m_wordDeletionEnabled , " phrase-drop-allowed " , false ) ;
2008-06-11 14:52:57 +04:00
2010-03-07 10:57:48 +03:00
//Disable discarding
SetBooleanParameter ( & m_disableDiscarding , " disable-discarding " , false ) ;
2011-02-24 16:14:42 +03:00
2010-03-07 10:57:48 +03:00
//Print All Derivations
SetBooleanParameter ( & m_printAllDerivations , " print-all-derivations " , false ) ;
2011-02-24 16:14:42 +03:00
// additional output
if ( m_parameter - > isParamSpecified ( " translation-details " ) ) {
2010-05-08 19:51:59 +04:00
const vector < string > & args = m_parameter - > GetParam ( " translation-details " ) ;
2011-02-24 16:14:42 +03:00
if ( args . size ( ) = = 1 ) {
2010-05-08 19:51:59 +04:00
m_detailedTranslationReportingFilePath = args [ 0 ] ;
2011-02-24 16:14:42 +03:00
} else {
2010-05-08 19:51:59 +04:00
UserMessage : : Add ( string ( " the translation-details option requires exactly one filename argument " ) ) ;
return false ;
}
}
2008-06-11 14:52:57 +04:00
2011-02-24 16:14:42 +03:00
// reordering constraints
m_maxDistortion = ( m_parameter - > GetParam ( " distortion-limit " ) . size ( ) > 0 ) ?
Scan < int > ( m_parameter - > GetParam ( " distortion-limit " ) [ 0 ] )
: - 1 ;
SetBooleanParameter ( & m_reorderingConstraint , " monotone-at-punctuation " , false ) ;
// settings for pruning
m_maxHypoStackSize = ( m_parameter - > GetParam ( " stack " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " stack " ) [ 0 ] ) : DEFAULT_MAX_HYPOSTACK_SIZE ;
2012-10-25 17:14:15 +04:00
2011-02-24 16:14:42 +03:00
m_minHypoStackDiversity = 0 ;
if ( m_parameter - > GetParam ( " stack-diversity " ) . size ( ) > 0 ) {
if ( m_maxDistortion > 15 ) {
UserMessage : : Add ( " stack diversity > 0 is not allowed for distortion limits larger than 15 " ) ;
return false ;
}
if ( m_inputType = = WordLatticeInput ) {
UserMessage : : Add ( " stack diversity > 0 is not allowed for lattice input " ) ;
return false ;
}
m_minHypoStackDiversity = Scan < size_t > ( m_parameter - > GetParam ( " stack-diversity " ) [ 0 ] ) ;
}
m_beamWidth = ( m_parameter - > GetParam ( " beam-threshold " ) . size ( ) > 0 ) ?
TransformScore ( Scan < float > ( m_parameter - > GetParam ( " beam-threshold " ) [ 0 ] ) )
: TransformScore ( DEFAULT_BEAM_WIDTH ) ;
m_earlyDiscardingThreshold = ( m_parameter - > GetParam ( " early-discarding-threshold " ) . size ( ) > 0 ) ?
TransformScore ( Scan < float > ( m_parameter - > GetParam ( " early-discarding-threshold " ) [ 0 ] ) )
: TransformScore ( DEFAULT_EARLY_DISCARDING_THRESHOLD ) ;
m_translationOptionThreshold = ( m_parameter - > GetParam ( " translation-option-threshold " ) . size ( ) > 0 ) ?
TransformScore ( Scan < float > ( m_parameter - > GetParam ( " translation-option-threshold " ) [ 0 ] ) )
: TransformScore ( DEFAULT_TRANSLATION_OPTION_THRESHOLD ) ;
m_maxNoTransOptPerCoverage = ( m_parameter - > GetParam ( " max-trans-opt-per-coverage " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " max-trans-opt-per-coverage " ) [ 0 ] ) : DEFAULT_MAX_TRANS_OPT_SIZE ;
m_maxNoPartTransOpt = ( m_parameter - > GetParam ( " max-partial-trans-opt " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " max-partial-trans-opt " ) [ 0 ] ) : DEFAULT_MAX_PART_TRANS_OPT_SIZE ;
m_maxPhraseLength = ( m_parameter - > GetParam ( " max-phrase-length " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " max-phrase-length " ) [ 0 ] ) : DEFAULT_MAX_PHRASE_LENGTH ;
m_cubePruningPopLimit = ( m_parameter - > GetParam ( " cube-pruning-pop-limit " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " cube-pruning-pop-limit " ) [ 0 ] ) : DEFAULT_CUBE_PRUNING_POP_LIMIT ;
m_cubePruningDiversity = ( m_parameter - > GetParam ( " cube-pruning-diversity " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " cube-pruning-diversity " ) [ 0 ] ) : DEFAULT_CUBE_PRUNING_DIVERSITY ;
2011-06-27 19:13:15 +04:00
SetBooleanParameter ( & m_cubePruningLazyScoring , " cube-pruning-lazy-scoring " , false ) ;
2012-09-13 13:43:01 +04:00
// early distortion cost
SetBooleanParameter ( & m_useEarlyDistortionCost , " early-distortion-cost " , false ) ;
2012-06-21 19:41:05 +04:00
2011-02-24 16:14:42 +03:00
// unknown word processing
SetBooleanParameter ( & m_dropUnknown , " drop-unknown " , false ) ;
2013-08-15 18:56:20 +04:00
SetBooleanParameter ( & m_markUnknown , " mark-unknown " , false ) ;
2011-02-24 16:14:42 +03:00
2011-09-09 22:03:00 +04:00
SetBooleanParameter ( & m_lmEnableOOVFeature , " lmodel-oov-feature " , false ) ;
2011-02-24 16:14:42 +03:00
// minimum Bayes risk decoding
SetBooleanParameter ( & m_mbr , " minimum-bayes-risk " , false ) ;
2010-02-03 13:23:32 +03:00
m_mbrSize = ( m_parameter - > GetParam ( " mbr-size " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < size_t > ( m_parameter - > GetParam ( " mbr-size " ) [ 0 ] ) : 200 ;
m_mbrScale = ( m_parameter - > GetParam ( " mbr-scale " ) . size ( ) > 0 ) ?
Scan < float > ( m_parameter - > GetParam ( " mbr-scale " ) [ 0 ] ) : 1.0f ;
2008-06-11 14:52:57 +04:00
2010-02-03 13:23:32 +03:00
//lattice mbr
SetBooleanParameter ( & m_useLatticeMBR , " lminimum-bayes-risk " , false ) ;
2010-04-12 13:51:29 +04:00
if ( m_useLatticeMBR & & m_mbr ) {
2011-02-24 16:14:42 +03:00
cerr < < " Errror: Cannot use both n-best mbr and lattice mbr together " < < endl ;
exit ( 1 ) ;
2010-04-12 13:51:29 +04:00
}
2013-05-29 21:16:15 +04:00
2012-05-28 10:03:45 +04:00
//mira training
SetBooleanParameter ( & m_mira , " mira " , false ) ;
2011-02-24 16:14:42 +03:00
2013-06-05 17:06:04 +04:00
// lattice MBR
2010-04-12 13:51:29 +04:00
if ( m_useLatticeMBR ) m_mbr = true ;
2011-02-24 16:14:42 +03:00
2010-02-09 14:37:33 +03:00
m_lmbrPruning = ( m_parameter - > GetParam ( " lmbr-pruning-factor " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < size_t > ( m_parameter - > GetParam ( " lmbr-pruning-factor " ) [ 0 ] ) : 30 ;
2010-02-03 13:23:32 +03:00
m_lmbrThetas = Scan < float > ( m_parameter - > GetParam ( " lmbr-thetas " ) ) ;
2010-02-03 14:20:20 +03:00
SetBooleanParameter ( & m_useLatticeHypSetForLatticeMBR , " lattice-hypo-set " , false ) ;
2010-02-03 22:46:35 +03:00
m_lmbrPrecision = ( m_parameter - > GetParam ( " lmbr-p " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < float > ( m_parameter - > GetParam ( " lmbr-p " ) [ 0 ] ) : 0.8f ;
2010-02-03 22:46:35 +03:00
m_lmbrPRatio = ( m_parameter - > GetParam ( " lmbr-r " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < float > ( m_parameter - > GetParam ( " lmbr-r " ) [ 0 ] ) : 0.6f ;
2010-03-14 23:23:17 +03:00
m_lmbrMapWeight = ( m_parameter - > GetParam ( " lmbr-map-weight " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < float > ( m_parameter - > GetParam ( " lmbr-map-weight " ) [ 0 ] ) : 0.0f ;
//consensus decoding
2010-04-12 13:51:29 +04:00
SetBooleanParameter ( & m_useConsensusDecoding , " consensus-decoding " , false ) ;
if ( m_useConsensusDecoding & & m_mbr ) {
2011-02-24 16:14:42 +03:00
cerr < < " Error: Cannot use consensus decoding together with mbr " < < endl ;
exit ( 1 ) ;
2010-04-12 13:51:29 +04:00
}
2011-02-24 16:14:42 +03:00
if ( m_useConsensusDecoding ) m_mbr = true ;
2013-05-29 21:16:15 +04:00
2012-08-03 18:38:45 +04:00
// Compact phrase table and reordering model
2012-08-03 14:04:39 +04:00
SetBooleanParameter ( & m_minphrMemory , " minphr-memory " , false ) ;
SetBooleanParameter ( & m_minlexrMemory , " minlexr-memory " , false ) ;
2011-02-24 16:14:42 +03:00
m_timeout_threshold = ( m_parameter - > GetParam ( " time-out " ) . size ( ) > 0 ) ?
Scan < size_t > ( m_parameter - > GetParam ( " time-out " ) [ 0 ] ) : - 1 ;
m_timeout = ( GetTimeoutThreshold ( ) = = ( size_t ) - 1 ) ? false : true ;
2008-06-11 14:52:57 +04:00
2010-04-23 19:01:06 +04:00
m_lmcache_cleanup_threshold = ( m_parameter - > GetParam ( " clean-lm-cache " ) . size ( ) > 0 ) ?
2011-02-24 16:14:42 +03:00
Scan < size_t > ( m_parameter - > GetParam ( " clean-lm-cache " ) [ 0 ] ) : 1 ;
2010-04-23 19:01:06 +04:00
2011-09-23 02:29:56 +04:00
m_threadCount = 1 ;
const std : : vector < std : : string > & threadInfo = m_parameter - > GetParam ( " threads " ) ;
if ( ! threadInfo . empty ( ) ) {
if ( threadInfo [ 0 ] = = " all " ) {
# ifdef WITH_THREADS
m_threadCount = boost : : thread : : hardware_concurrency ( ) ;
if ( ! m_threadCount ) {
UserMessage : : Add ( " -threads all specified but Boost doesn't know how many cores there are " ) ;
return false ;
}
# else
UserMessage : : Add ( " -threads all specified but moses not built with thread support " ) ;
return false ;
# endif
} else {
m_threadCount = Scan < int > ( threadInfo [ 0 ] ) ;
if ( m_threadCount < 1 ) {
UserMessage : : Add ( " Specify at least one thread. " ) ;
return false ;
}
# ifndef WITH_THREADS
if ( m_threadCount > 1 ) {
UserMessage : : Add ( std : : string ( " Error: Thread count of " ) + threadInfo [ 0 ] + " but moses not built with thread support " ) ;
return false ;
}
# endif
}
}
2011-11-13 21:14:40 +04:00
m_startTranslationId = ( m_parameter - > GetParam ( " start-translation-id " ) . size ( ) > 0 ) ?
2013-05-29 21:16:15 +04:00
Scan < long > ( m_parameter - > GetParam ( " start-translation-id " ) [ 0 ] ) : 0 ;
2011-11-13 21:14:40 +04:00
2011-02-24 16:14:42 +03:00
// use of xml in input
if ( m_parameter - > GetParam ( " xml-input " ) . size ( ) = = 0 ) m_xmlInputType = XmlPassThrough ;
else if ( m_parameter - > GetParam ( " xml-input " ) [ 0 ] = = " exclusive " ) m_xmlInputType = XmlExclusive ;
else if ( m_parameter - > GetParam ( " xml-input " ) [ 0 ] = = " inclusive " ) m_xmlInputType = XmlInclusive ;
2013-08-15 14:46:45 +04:00
else if ( m_parameter - > GetParam ( " xml-input " ) [ 0 ] = = " constraint " ) m_xmlInputType = XmlConstraint ;
2011-02-24 16:14:42 +03:00
else if ( m_parameter - > GetParam ( " xml-input " ) [ 0 ] = = " ignore " ) m_xmlInputType = XmlIgnore ;
else if ( m_parameter - > GetParam ( " xml-input " ) [ 0 ] = = " pass-through " ) m_xmlInputType = XmlPassThrough ;
else {
2013-08-15 14:46:45 +04:00
UserMessage : : Add ( " invalid xml-input value, must be pass-through, exclusive, inclusive, constraint, or ignore " ) ;
2011-02-24 16:14:42 +03:00
return false ;
}
2011-11-16 16:38:22 +04:00
// specify XML tags opening and closing brackets for XML option
if ( m_parameter - > GetParam ( " xml-brackets " ) . size ( ) > 0 ) {
2013-05-29 21:16:15 +04:00
std : : vector < std : : string > brackets = Tokenize ( m_parameter - > GetParam ( " xml-brackets " ) [ 0 ] ) ;
if ( brackets . size ( ) ! = 2 ) {
cerr < < " invalid xml-brackets value, must specify exactly 2 blank-delimited strings for XML tags opening and closing brackets " < < endl ;
exit ( 1 ) ;
}
m_xmlBrackets . first = brackets [ 0 ] ;
m_xmlBrackets . second = brackets [ 1 ] ;
cerr < < " XML tags opening and closing brackets for XML input are: " < < m_xmlBrackets . first < < " and " < < m_xmlBrackets . second < < endl ;
2011-11-16 16:38:22 +04:00
}
2013-07-18 20:00:07 +04:00
if ( m_parameter - > GetParam ( " placeholder-factor " ) . size ( ) > 0 ) {
2013-07-30 18:04:37 +04:00
CHECK ( m_parameter - > GetParam ( " placeholder-factor " ) . size ( ) = = 2 ) ;
2013-07-19 21:23:38 +04:00
m_placeHolderFactor = std : : pair < FactorType , FactorType > (
2013-07-30 18:04:37 +04:00
Scan < FactorType > ( m_parameter - > GetParam ( " placeholder-factor " ) [ 0 ] ) ,
Scan < FactorType > ( m_parameter - > GetParam ( " placeholder-factor " ) [ 1 ] )
) ;
2013-07-18 20:00:44 +04:00
} else {
2013-07-19 21:23:38 +04:00
m_placeHolderFactor = std : : pair < FactorType , FactorType > ( NOT_FOUND , NOT_FOUND ) ;
2013-07-18 20:00:07 +04:00
}
2012-12-31 20:41:33 +04:00
// all features
2013-01-01 21:27:26 +04:00
map < string , int > featureIndexMap ;
2012-12-31 20:41:33 +04:00
const vector < string > & features = m_parameter - > GetParam ( " feature " ) ;
2013-07-19 01:54:52 +04:00
FeatureRegistry registry ;
2012-12-31 20:41:33 +04:00
for ( size_t i = 0 ; i < features . size ( ) ; + + i ) {
2013-02-08 21:57:22 +04:00
const string & line = Trim ( features [ i ] ) ;
cerr < < " line= " < < line < < endl ;
if ( line . empty ( ) )
continue ;
2012-12-31 20:41:33 +04:00
vector < string > toks = Tokenize ( line ) ;
2013-01-01 21:27:26 +04:00
const string & feature = toks [ 0 ] ;
2013-01-30 15:37:14 +04:00
2013-07-19 01:54:52 +04:00
registry . Construct ( feature , line ) ;
2012-12-31 20:41:33 +04:00
}
2013-06-07 20:32:01 +04:00
OverrideFeatures ( ) ;
2013-05-31 23:21:02 +04:00
LoadFeatureFunctions ( ) ;
2013-01-29 20:57:56 +04:00
2010-09-14 20:25:33 +04:00
if ( ! LoadDecodeGraphs ( ) ) return false ;
2010-08-10 17:12:00 +04:00
2013-02-07 00:05:00 +04:00
if ( ! CheckWeights ( ) ) {
return false ;
}
2012-12-21 17:34:07 +04:00
//Add any other features here.
2008-06-11 14:52:57 +04:00
2010-10-15 19:19:17 +04:00
//Load extra feature weights
vector < string > extraWeightConfig = m_parameter - > GetParam ( " weight-file " ) ;
2011-08-19 20:09:36 +04:00
if ( extraWeightConfig . size ( ) ) {
2012-07-26 20:32:50 +04:00
if ( extraWeightConfig . size ( ) ! = 1 ) {
UserMessage : : Add ( " One argument should be supplied for weight-file " ) ;
return false ;
}
ScoreComponentCollection extraWeights ;
if ( ! extraWeights . Load ( extraWeightConfig [ 0 ] ) ) {
UserMessage : : Add ( " Unable to load weights from " + extraWeightConfig [ 0 ] ) ;
return false ;
}
2013-01-01 21:27:26 +04:00
2012-07-26 20:32:50 +04:00
m_allWeights . PlusEquals ( extraWeights ) ;
2012-02-01 18:05:49 +04:00
}
2013-05-31 15:28:57 +04:00
// alternate weight settings
if ( m_parameter - > GetParam ( " alternate-weight-setting " ) . size ( ) > 0 ) {
2013-06-05 17:06:04 +04:00
if ( ! LoadAlternateWeightSettings ( ) ) {
return false ;
}
2013-05-31 15:28:57 +04:00
}
2011-02-24 16:14:42 +03:00
return true ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
void StaticData : : SetBooleanParameter ( bool * parameter , string parameterName , bool defaultValue )
2008-06-11 14:52:57 +04:00
{
// default value if nothing is specified
* parameter = defaultValue ;
2011-02-24 16:14:42 +03:00
if ( ! m_parameter - > isParamSpecified ( parameterName ) ) {
2008-06-11 14:52:57 +04:00
return ;
}
// if parameter is just specified as, e.g. "-parameter" set it true
2011-02-24 16:14:42 +03:00
if ( m_parameter - > GetParam ( parameterName ) . size ( ) = = 0 ) {
2008-06-11 14:52:57 +04:00
* parameter = true ;
}
// if paramter is specified "-parameter true" or "-parameter false"
2011-02-24 16:14:42 +03:00
else if ( m_parameter - > GetParam ( parameterName ) . size ( ) = = 1 ) {
2008-06-11 14:52:57 +04:00
* parameter = Scan < bool > ( m_parameter - > GetParam ( parameterName ) [ 0 ] ) ;
}
}
2013-02-22 00:03:35 +04:00
void StaticData : : SetWeight ( const FeatureFunction * sp , float weight )
2010-10-07 02:06:49 +04:00
{
2011-11-09 21:16:02 +04:00
m_allWeights . Resize ( ) ;
2010-10-07 02:06:49 +04:00
m_allWeights . Assign ( sp , weight ) ;
}
2013-02-22 00:03:35 +04:00
void StaticData : : SetWeights ( const FeatureFunction * sp , const std : : vector < float > & weights )
2010-10-07 02:06:49 +04:00
{
2011-11-09 21:16:02 +04:00
m_allWeights . Resize ( ) ;
2010-10-07 02:06:49 +04:00
m_allWeights . Assign ( sp , weights ) ;
}
2010-04-08 21:16:10 +04:00
void StaticData : : LoadNonTerminals ( )
{
2011-02-24 16:14:42 +03:00
string defaultNonTerminals ;
if ( m_parameter - > GetParam ( " non-terminals " ) . size ( ) = = 0 ) {
defaultNonTerminals = " X " ;
} else {
vector < std : : string > tokens = Tokenize ( m_parameter - > GetParam ( " non-terminals " ) [ 0 ] ) ;
defaultNonTerminals = tokens [ 0 ] ;
}
FactorCollection & factorCollection = FactorCollection : : Instance ( ) ;
m_inputDefaultNonTerminal . SetIsNonTerminal ( true ) ;
const Factor * sourceFactor = factorCollection . AddFactor ( Input , 0 , defaultNonTerminals ) ;
m_inputDefaultNonTerminal . SetFactor ( 0 , sourceFactor ) ;
m_outputDefaultNonTerminal . SetIsNonTerminal ( true ) ;
const Factor * targetFactor = factorCollection . AddFactor ( Output , 0 , defaultNonTerminals ) ;
m_outputDefaultNonTerminal . SetFactor ( 0 , targetFactor ) ;
// for unknwon words
if ( m_parameter - > GetParam ( " unknown-lhs " ) . size ( ) = = 0 ) {
UnknownLHSEntry entry ( defaultNonTerminals , 0.0f ) ;
m_unknownLHS . push_back ( entry ) ;
} else {
const string & filePath = m_parameter - > GetParam ( " unknown-lhs " ) [ 0 ] ;
InputFileStream inStream ( filePath ) ;
string line ;
while ( getline ( inStream , line ) ) {
vector < string > tokens = Tokenize ( line ) ;
2011-11-18 16:07:41 +04:00
CHECK ( tokens . size ( ) = = 2 ) ;
2011-02-24 16:14:42 +03:00
UnknownLHSEntry entry ( tokens [ 0 ] , Scan < float > ( tokens [ 1 ] ) ) ;
m_unknownLHS . push_back ( entry ) ;
}
}
2010-04-08 21:16:10 +04:00
}
2011-02-24 16:14:42 +03:00
2010-04-08 21:16:10 +04:00
void StaticData : : LoadChartDecodingParameters ( )
{
2011-02-24 16:14:42 +03:00
LoadNonTerminals ( ) ;
// source label overlap
if ( m_parameter - > GetParam ( " source-label-overlap " ) . size ( ) > 0 ) {
m_sourceLabelOverlap = ( SourceLabelOverlap ) Scan < int > ( m_parameter - > GetParam ( " source-label-overlap " ) [ 0 ] ) ;
} else {
m_sourceLabelOverlap = SourceLabelOverlapAdd ;
}
m_ruleLimit = ( m_parameter - > GetParam ( " rule-limit " ) . size ( ) > 0 )
? Scan < size_t > ( m_parameter - > GetParam ( " rule-limit " ) [ 0 ] ) : DEFAULT_MAX_TRANS_OPT_SIZE ;
2010-04-08 21:16:10 +04:00
}
2011-02-24 16:14:42 +03:00
bool StaticData : : LoadDecodeGraphs ( )
{
const vector < string > & mappingVector = m_parameter - > GetParam ( " mapping " ) ;
const vector < size_t > & maxChartSpans = Scan < size_t > ( m_parameter - > GetParam ( " max-chart-span " ) ) ;
2013-05-30 14:25:57 +04:00
const std : : vector < FeatureFunction * > * featuresRemaining = & FeatureFunction : : GetFeatureFunctions ( ) ;
2011-02-24 16:14:42 +03:00
DecodeStep * prev = 0 ;
size_t prevDecodeGraphInd = 0 ;
2013-05-30 14:25:57 +04:00
2011-02-24 16:14:42 +03:00
for ( size_t i = 0 ; i < mappingVector . size ( ) ; i + + ) {
vector < string > token = Tokenize ( mappingVector [ i ] ) ;
size_t decodeGraphInd ;
DecodeType decodeType ;
size_t index ;
if ( token . size ( ) = = 2 ) {
decodeGraphInd = 0 ;
decodeType = token [ 0 ] = = " T " ? Translate : Generate ;
index = Scan < size_t > ( token [ 1 ] ) ;
} else if ( token . size ( ) = = 3 ) {
// For specifying multiple translation model
decodeGraphInd = Scan < size_t > ( token [ 0 ] ) ;
//the vectorList index can only increment by one
2011-11-18 16:07:41 +04:00
CHECK ( decodeGraphInd = = prevDecodeGraphInd | | decodeGraphInd = = prevDecodeGraphInd + 1 ) ;
2011-02-24 16:14:42 +03:00
if ( decodeGraphInd > prevDecodeGraphInd ) {
2008-06-11 14:52:57 +04:00
prev = NULL ;
}
2013-05-31 03:46:28 +04:00
if ( prevDecodeGraphInd < decodeGraphInd ) {
featuresRemaining = & FeatureFunction : : GetFeatureFunctions ( ) ;
}
2011-02-24 16:14:42 +03:00
decodeType = token [ 1 ] = = " T " ? Translate : Generate ;
index = Scan < size_t > ( token [ 2 ] ) ;
} else {
UserMessage : : Add ( " Malformed mapping! " ) ;
2011-11-18 16:07:41 +04:00
CHECK ( false ) ;
2011-02-24 16:14:42 +03:00
}
2011-08-18 01:13:21 +04:00
2011-02-24 16:14:42 +03:00
DecodeStep * decodeStep = NULL ;
switch ( decodeType ) {
case Translate :
if ( index > = m_phraseDictionary . size ( ) ) {
stringstream strme ;
strme < < " No phrase dictionary with index "
< < index < < " available! " ;
UserMessage : : Add ( strme . str ( ) ) ;
2011-11-18 16:07:41 +04:00
CHECK ( false ) ;
2011-02-24 16:14:42 +03:00
}
2013-05-30 14:25:57 +04:00
decodeStep = new DecodeStepTranslation ( m_phraseDictionary [ index ] , prev , * featuresRemaining ) ;
2011-02-24 16:14:42 +03:00
break ;
case Generate :
if ( index > = m_generationDictionary . size ( ) ) {
stringstream strme ;
strme < < " No generation dictionary with index "
< < index < < " available! " ;
UserMessage : : Add ( strme . str ( ) ) ;
2011-11-18 16:07:41 +04:00
CHECK ( false ) ;
2011-02-24 16:14:42 +03:00
}
2013-05-30 14:25:57 +04:00
decodeStep = new DecodeStepGeneration ( m_generationDictionary [ index ] , prev , * featuresRemaining ) ;
2011-02-24 16:14:42 +03:00
break ;
case InsertNullFertilityWord :
2011-11-18 16:07:41 +04:00
CHECK ( ! " Please implement NullFertilityInsertion. " ) ;
2011-02-24 16:14:42 +03:00
break ;
}
2013-05-30 14:25:57 +04:00
featuresRemaining = & decodeStep - > GetFeaturesRemaining ( ) ;
2011-11-18 16:07:41 +04:00
CHECK ( decodeStep ) ;
2011-02-24 16:14:42 +03:00
if ( m_decodeGraphs . size ( ) < decodeGraphInd + 1 ) {
DecodeGraph * decodeGraph ;
2012-10-12 17:09:45 +04:00
if ( IsChart ( ) ) {
2011-02-24 16:14:42 +03:00
size_t maxChartSpan = ( decodeGraphInd < maxChartSpans . size ( ) ) ? maxChartSpans [ decodeGraphInd ] : DEFAULT_MAX_CHART_SPAN ;
2013-05-29 21:16:15 +04:00
cerr < < " max-chart-span: " < < maxChartSpans [ decodeGraphInd ] < < endl ;
2011-02-24 16:14:42 +03:00
decodeGraph = new DecodeGraph ( m_decodeGraphs . size ( ) , maxChartSpan ) ;
} else {
decodeGraph = new DecodeGraph ( m_decodeGraphs . size ( ) ) ;
}
m_decodeGraphs . push_back ( decodeGraph ) ; // TODO max chart span
}
m_decodeGraphs [ decodeGraphInd ] - > Add ( decodeStep ) ;
prev = decodeStep ;
prevDecodeGraphInd = decodeGraphInd ;
}
// set maximum n-gram size for backoff approach to decoding paths
// default is always use subsequent paths (value = 0)
for ( size_t i = 0 ; i < m_decodeGraphs . size ( ) ; i + + ) {
m_decodeGraphBackoff . push_back ( 0 ) ;
}
// if specified, record maxmimum unseen n-gram size
const vector < string > & backoffVector = m_parameter - > GetParam ( " decoding-graph-backoff " ) ;
for ( size_t i = 0 ; i < m_decodeGraphs . size ( ) & & i < backoffVector . size ( ) ; i + + ) {
m_decodeGraphBackoff [ i ] = Scan < size_t > ( backoffVector [ i ] ) ;
}
return true ;
2008-06-11 14:52:57 +04:00
}
2010-09-14 13:42:37 +04:00
void StaticData : : ReLoadParameter ( )
{
2012-12-04 21:09:23 +04:00
assert ( false ) ; // TODO completely redo. Too many hardcoded ff
/*
2011-08-19 20:09:36 +04:00
m_verboseLevel = 1 ;
if ( m_parameter - > GetParam ( " verbose " ) . size ( ) = = 1 ) {
m_verboseLevel = Scan < size_t > ( m_parameter - > GetParam ( " verbose " ) [ 0 ] ) ;
}
// check whether "weight-u" is already set
if ( m_parameter - > isParamShortNameSpecified ( " u " ) ) {
if ( m_parameter - > GetParamShortName ( " u " ) . size ( ) < 1 ) {
PARAM_VEC w ( 1 , " 1.0 " ) ;
m_parameter - > OverwriteParamShortName ( " u " , w ) ;
}
}
//loop over all ScoreProducer to update weights
std : : vector < const ScoreProducer * > : : const_iterator iterSP ;
for ( iterSP = transSystem . GetFeatureFunctions ( ) . begin ( ) ; iterSP ! = transSystem . GetFeatureFunctions ( ) . end ( ) ; + + iterSP ) {
std : : string paramShortName = ( * iterSP ) - > GetScoreProducerWeightShortName ( ) ;
vector < float > Weights = Scan < float > ( m_parameter - > GetParamShortName ( paramShortName ) ) ;
if ( paramShortName = = " d " ) { //basic distortion model takes the first weight
if ( ( * iterSP ) - > GetScoreProducerDescription ( ) = = " Distortion " ) {
Weights . resize ( 1 ) ; //take only the first element
} else { //lexicalized reordering model takes the other
Weights . erase ( Weights . begin ( ) ) ; //remove the first element
}
// std::cerr << "this is the Distortion Score Producer -> " << (*iterSP)->GetScoreProducerDescription() << std::cerr;
// std::cerr << "this is the Distortion Score Producer; it has " << (*iterSP)->GetNumScoreComponents() << " weights"<< std::cerr;
// std::cerr << Weights << std::endl;
} else if ( paramShortName = = " tm " ) {
continue ;
}
SetWeights ( * iterSP , Weights ) ;
}
// std::cerr << "There are " << m_phraseDictionary.size() << " m_phraseDictionaryfeatures" << std::endl;
const vector < float > WeightsTM = Scan < float > ( m_parameter - > GetParamShortName ( " tm " ) ) ;
// std::cerr << "WeightsTM: " << WeightsTM << std::endl;
const vector < float > WeightsLM = Scan < float > ( m_parameter - > GetParamShortName ( " lm " ) ) ;
// std::cerr << "WeightsLM: " << WeightsLM << std::endl;
size_t index_WeightTM = 0 ;
for ( size_t i = 0 ; i < transSystem . GetPhraseDictionaries ( ) . size ( ) ; + + i ) {
PhraseDictionaryFeature & phraseDictionaryFeature = * m_phraseDictionary [ i ] ;
// std::cerr << "phraseDictionaryFeature.GetNumScoreComponents():" << phraseDictionaryFeature.GetNumScoreComponents() << std::endl;
// std::cerr << "phraseDictionaryFeature.GetNumInputScores():" << phraseDictionaryFeature.GetNumInputScores() << std::endl;
vector < float > tmp_weights ;
for ( size_t j = 0 ; j < phraseDictionaryFeature . GetNumScoreComponents ( ) ; + + j )
tmp_weights . push_back ( WeightsTM [ index_WeightTM + + ] ) ;
// std::cerr << tmp_weights << std::endl;
SetWeights ( & phraseDictionaryFeature , tmp_weights ) ;
}
2012-12-04 21:09:23 +04:00
*/
2010-09-14 13:42:37 +04:00
}
2010-09-17 18:25:08 +04:00
2011-11-16 13:13:17 +04:00
void StaticData : : ReLoadBleuScoreFeatureParameter ( float weight )
2010-11-24 20:06:54 +03:00
{
2012-12-04 21:09:23 +04:00
assert ( false ) ;
/*
2011-11-16 13:13:17 +04:00
//loop over ScoreProducers to update weights of BleuScoreFeature
2011-08-19 20:09:36 +04:00
std : : vector < const ScoreProducer * > : : const_iterator iterSP ;
for ( iterSP = transSystem . GetFeatureFunctions ( ) . begin ( ) ; iterSP ! = transSystem . GetFeatureFunctions ( ) . end ( ) ; + + iterSP ) {
std : : string paramShortName = ( * iterSP ) - > GetScoreProducerWeightShortName ( ) ;
if ( paramShortName = = " bl " ) {
2011-11-16 13:13:17 +04:00
SetWeight ( * iterSP , weight ) ;
break ;
2011-08-19 20:09:36 +04:00
}
}
2012-12-04 21:09:23 +04:00
*/
2010-11-24 20:06:54 +03:00
}
2010-09-17 18:25:08 +04:00
// ScoreComponentCollection StaticData::GetAllWeightsScoreComponentCollection() const {}
// in ScoreComponentCollection.h
2011-08-19 20:09:36 +04:00
2012-07-31 00:07:19 +04:00
void StaticData : : SetExecPath ( const std : : string & path )
{
2012-10-04 18:08:22 +04:00
/*
namespace fs = boost : : filesystem ;
2013-05-29 21:16:15 +04:00
2012-10-04 18:08:22 +04:00
fs : : path full_path ( fs : : initial_path < fs : : path > ( ) ) ;
2013-05-29 21:16:15 +04:00
2012-10-04 18:08:22 +04:00
full_path = fs : : system_complete ( fs : : path ( path ) ) ;
2013-05-29 21:16:15 +04:00
2012-10-04 18:08:22 +04:00
//Without file name
m_binPath = full_path . parent_path ( ) . string ( ) ;
*/
2013-05-29 21:16:15 +04:00
2012-10-04 18:08:22 +04:00
// NOT TESTED
size_t pos = path . rfind ( " / " ) ;
2013-05-29 21:16:15 +04:00
if ( pos ! = string : : npos ) {
m_binPath = path . substr ( 0 , pos ) ;
2012-10-04 18:08:22 +04:00
}
2012-07-31 00:07:19 +04:00
cerr < < m_binPath < < endl ;
}
const string & StaticData : : GetBinDirectory ( ) const
{
return m_binPath ;
}
2013-05-29 21:16:15 +04:00
float StaticData : : GetWeightWordPenalty ( ) const
{
2012-12-19 19:38:57 +04:00
float weightWP = GetWeight ( m_wpProducer ) ;
//VERBOSE(1, "Read weightWP from translation sytem: " << weightWP << std::endl);
return weightWP ;
}
2013-05-29 21:16:15 +04:00
float StaticData : : GetWeightUnknownWordPenalty ( ) const
{
2012-12-19 20:22:10 +04:00
return GetWeight ( m_unknownWordPenaltyProducer ) ;
}
2013-05-29 21:16:15 +04:00
void StaticData : : InitializeForInput ( const InputType & source ) const
{
2013-01-18 22:22:06 +04:00
const std : : vector < FeatureFunction * > & producers = FeatureFunction : : GetFeatureFunctions ( ) ;
2013-05-29 21:16:15 +04:00
for ( size_t i = 0 ; i < producers . size ( ) ; + + i ) {
2013-01-18 22:22:06 +04:00
FeatureFunction & ff = * producers [ i ] ;
2012-12-27 16:41:10 +04:00
ff . InitializeForInput ( source ) ;
2012-12-21 19:59:52 +04:00
}
}
2013-05-29 21:16:15 +04:00
void StaticData : : CleanUpAfterSentenceProcessing ( const InputType & source ) const
{
2013-01-18 22:22:06 +04:00
const std : : vector < FeatureFunction * > & producers = FeatureFunction : : GetFeatureFunctions ( ) ;
2013-05-29 21:16:15 +04:00
for ( size_t i = 0 ; i < producers . size ( ) ; + + i ) {
2013-01-18 22:22:06 +04:00
FeatureFunction & ff = * producers [ i ] ;
2012-12-27 17:10:44 +04:00
ff . CleanUpAfterSentenceProcessing ( source ) ;
2012-12-21 19:59:52 +04:00
}
}
2012-12-25 01:51:11 +04:00
2013-05-31 23:21:02 +04:00
void StaticData : : LoadFeatureFunctions ( )
2013-01-17 21:15:10 +04:00
{
2013-01-18 22:22:06 +04:00
const std : : vector < FeatureFunction * > & ffs = FeatureFunction : : GetFeatureFunctions ( ) ;
std : : vector < FeatureFunction * > : : const_iterator iter ;
2013-01-17 21:15:10 +04:00
for ( iter = ffs . begin ( ) ; iter ! = ffs . end ( ) ; + + iter ) {
2013-05-31 23:21:02 +04:00
FeatureFunction * ff = * iter ;
2013-06-06 18:51:31 +04:00
bool doLoad = true ;
if ( PhraseDictionary * ffCast = dynamic_cast < PhraseDictionary * > ( ff ) ) {
m_phraseDictionary . push_back ( ffCast ) ;
doLoad = false ;
} else if ( const GenerationDictionary * ffCast = dynamic_cast < const GenerationDictionary * > ( ff ) ) {
m_generationDictionary . push_back ( ffCast ) ;
} else if ( WordPenaltyProducer * ffCast = dynamic_cast < WordPenaltyProducer * > ( ff ) ) {
2013-06-05 01:09:21 +04:00
CHECK ( m_wpProducer = = NULL ) ; // max 1 feature;
2013-06-06 18:51:31 +04:00
m_wpProducer = ffCast ;
} else if ( UnknownWordPenaltyProducer * ffCast = dynamic_cast < UnknownWordPenaltyProducer * > ( ff ) ) {
2013-06-05 01:09:21 +04:00
CHECK ( m_unknownWordPenaltyProducer = = NULL ) ; // max 1 feature;
2013-06-06 18:51:31 +04:00
m_unknownWordPenaltyProducer = ffCast ;
} else if ( const InputFeature * ffCast = dynamic_cast < const InputFeature * > ( ff ) ) {
2013-06-05 01:09:21 +04:00
CHECK ( m_inputFeature = = NULL ) ; // max 1 input feature;
2013-06-06 18:51:31 +04:00
m_inputFeature = ffCast ;
2013-06-05 01:09:21 +04:00
}
2013-06-06 18:51:31 +04:00
if ( doLoad ) {
2013-05-31 23:21:02 +04:00
ff - > Load ( ) ;
2013-01-18 21:57:26 +04:00
}
2013-02-13 00:27:14 +04:00
}
2013-01-18 21:57:26 +04:00
2013-02-13 00:27:14 +04:00
for ( size_t i = 0 ; i < m_phraseDictionary . size ( ) ; + + i ) {
2013-06-03 15:33:18 +04:00
PhraseDictionary * pt = m_phraseDictionary [ i ] ;
pt - > Load ( ) ;
2013-01-17 21:15:10 +04:00
}
2013-05-31 23:21:02 +04:00
2013-01-17 21:15:10 +04:00
}
2013-02-07 00:05:00 +04:00
bool StaticData : : CheckWeights ( ) const
{
set < string > weightNames = m_parameter - > GetWeightNames ( ) ;
const std : : vector < FeatureFunction * > & ffs = FeatureFunction : : GetFeatureFunctions ( ) ;
for ( size_t i = 0 ; i < ffs . size ( ) ; + + i ) {
const FeatureFunction & ff = * ffs [ i ] ;
const string & descr = ff . GetScoreProducerDescription ( ) ;
set < string > : : iterator iter = weightNames . find ( descr ) ;
if ( iter = = weightNames . end ( ) ) {
cerr < < " Can't find weights for feature function " < < descr < < endl ;
2013-05-29 21:16:15 +04:00
} else {
2013-02-07 00:05:00 +04:00
weightNames . erase ( iter ) ;
}
}
if ( ! weightNames . empty ( ) ) {
cerr < < " The following weights have no feature function. Maybe incorrectly spelt weights: " ;
set < string > : : iterator iter ;
for ( iter = weightNames . begin ( ) ; iter ! = weightNames . end ( ) ; + + iter ) {
cerr < < * iter < < " , " ;
}
return false ;
}
return true ;
}
2013-06-05 17:06:04 +04:00
/**! Read in settings for alternative weights */
bool StaticData : : LoadAlternateWeightSettings ( )
2013-06-03 15:33:18 +04:00
{
2013-06-05 17:06:04 +04:00
if ( m_threadCount > 1 ) {
cerr < < " ERROR: alternative weight settings currently not supported with multi-threading. " ;
return false ;
}
2013-05-31 15:28:57 +04:00
const vector < string > & weightSpecification = m_parameter - > GetParam ( " alternate-weight-setting " ) ;
2013-06-03 15:33:18 +04:00
2013-05-31 15:28:57 +04:00
// get mapping from feature names to feature functions
map < string , FeatureFunction * > nameToFF ;
const std : : vector < FeatureFunction * > & ffs = FeatureFunction : : GetFeatureFunctions ( ) ;
for ( size_t i = 0 ; i < ffs . size ( ) ; + + i ) {
nameToFF [ ffs [ i ] - > GetScoreProducerDescription ( ) ] = ffs [ i ] ;
}
// copy main weight setting as default
m_weightSetting [ " default " ] = new ScoreComponentCollection ( m_allWeights ) ;
// go through specification in config file
string currentId = " " ;
bool hasErrors = false ;
for ( size_t i = 0 ; i < weightSpecification . size ( ) ; + + i ) {
// identifier line (with optional additional specifications)
if ( weightSpecification [ i ] . find ( " id= " ) = = 0 ) {
vector < string > tokens = Tokenize ( weightSpecification [ i ] ) ;
vector < string > args = Tokenize ( tokens [ 0 ] , " = " ) ;
currentId = args [ 1 ] ;
2013-06-06 18:51:31 +04:00
cerr < < " alternate weight setting " < < currentId < < endl ;
2013-05-31 15:28:57 +04:00
CHECK ( m_weightSetting . find ( currentId ) = = m_weightSetting . end ( ) ) ;
m_weightSetting [ currentId ] = new ScoreComponentCollection ;
// other specifications
for ( size_t j = 1 ; j < tokens . size ( ) ; j + + ) {
vector < string > args = Tokenize ( tokens [ j ] , " = " ) ;
2013-06-06 18:51:31 +04:00
// TODO: support for sparse weights
if ( args [ 0 ] = = " weight-file " ) {
cerr < < " ERROR: sparse weight files currently not supported " ;
}
// ignore feature functions
else if ( args [ 0 ] = = " ignore-ff " ) {
set < string > * ffNameSet = new set < string > ;
m_weightSettingIgnoreFF [ currentId ] = * ffNameSet ;
2013-08-07 18:35:40 +04:00
vector < string > featureFunctionName = Tokenize ( args [ 1 ] , " , " ) ;
2013-06-06 18:51:31 +04:00
for ( size_t k = 0 ; k < featureFunctionName . size ( ) ; k + + ) {
// check if a valid nane
map < string , FeatureFunction * > : : iterator ffLookUp = nameToFF . find ( featureFunctionName [ k ] ) ;
if ( ffLookUp = = nameToFF . end ( ) ) {
cerr < < " ERROR: alternate weight setting " < < currentId < < " specifies to ignore feature function " < < featureFunctionName [ k ] < < " but there is no such feature function " < < endl ;
hasErrors = true ;
} else {
m_weightSettingIgnoreFF [ currentId ] . insert ( featureFunctionName [ k ] ) ;
}
}
2013-05-31 15:28:57 +04:00
}
}
}
2013-06-03 15:33:18 +04:00
// weight lines
2013-05-31 15:28:57 +04:00
else {
CHECK ( currentId ! = " " ) ;
vector < string > tokens = Tokenize ( weightSpecification [ i ] ) ;
CHECK ( tokens . size ( ) > = 2 ) ;
// get name and weight values
string name = tokens [ 0 ] ;
name = name . substr ( 0 , name . size ( ) - 1 ) ; // remove trailing "="
vector < float > weights ( tokens . size ( ) - 1 ) ;
for ( size_t i = 1 ; i < tokens . size ( ) ; + + i ) {
float weight = Scan < float > ( tokens [ i ] ) ;
weights [ i - 1 ] = weight ;
}
2012-12-19 20:51:55 +04:00
2013-05-31 15:28:57 +04:00
// check if a valid nane
map < string , FeatureFunction * > : : iterator ffLookUp = nameToFF . find ( name ) ;
if ( ffLookUp = = nameToFF . end ( ) ) {
2013-06-03 15:33:18 +04:00
cerr < < " ERROR: alternate weight setting " < < currentId < < " specifies weight(s) for " < < name < < " but there is no such feature function " < < endl ;
hasErrors = true ;
} else {
m_weightSetting [ currentId ] - > Assign ( nameToFF [ name ] , weights ) ;
2013-05-31 15:28:57 +04:00
}
}
}
CHECK ( ! hasErrors ) ;
2013-06-05 17:06:04 +04:00
return true ;
2013-05-31 15:28:57 +04:00
}
2013-06-07 20:32:01 +04:00
void StaticData : : OverrideFeatures ( )
{
const PARAM_VEC & params = m_parameter - > GetParam ( " feature-overwrite " ) ;
for ( size_t i = 0 ; i < params . size ( ) ; + + i ) {
2013-06-10 21:11:55 +04:00
const string & str = params [ i ] ;
vector < string > toks = Tokenize ( str ) ;
CHECK ( toks . size ( ) > 1 ) ;
2013-06-07 20:32:01 +04:00
2013-06-10 21:11:55 +04:00
FeatureFunction & ff = FeatureFunction : : FindFeatureFunction ( toks [ 0 ] ) ;
2013-06-07 20:32:01 +04:00
2013-06-10 21:11:55 +04:00
for ( size_t j = 1 ; j < toks . size ( ) ; + + j ) {
const string & keyValStr = toks [ j ] ;
vector < string > keyVal = Tokenize ( keyValStr , " = " ) ;
CHECK ( keyVal . size ( ) = = 2 ) ;
2013-07-11 23:41:42 +04:00
VERBOSE ( 1 , " Override " < < ff . GetScoreProducerDescription ( ) < < " "
2013-07-18 20:00:44 +04:00
< < keyVal [ 0 ] < < " = " < < keyVal [ 1 ] < < endl ) ;
2013-07-11 23:41:42 +04:00
2013-06-11 03:05:12 +04:00
ff . SetParameter ( keyVal [ 0 ] , keyVal [ 1 ] ) ;
2013-06-07 20:32:01 +04:00
2013-06-10 21:11:55 +04:00
}
2013-06-07 20:32:01 +04:00
}
}
2013-05-31 15:28:57 +04:00
} // namespace
2008-10-09 03:51:26 +04:00