2010-09-15 18:36:07 +04:00
/***********************************************************************
2011-03-23 15:13:38 +03:00
Moses - factored phrase - based language decoder
Copyright ( C ) 2010 University of Edinburgh
2010-09-15 18:36:07 +04:00
2011-03-23 15:13:38 +03:00
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
2010-09-15 18:36:07 +04:00
2011-03-23 15:13:38 +03:00
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
2010-09-15 18:36:07 +04:00
2011-03-23 15:13:38 +03:00
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-09-15 18:36:07 +04:00
2010-09-28 19:13:50 +04:00
# include <algorithm>
2010-09-15 19:38:46 +04:00
# include <cstdlib>
# include <ctime>
2010-09-15 18:36:07 +04:00
# include <string>
# include <vector>
2011-05-31 19:39:48 +04:00
# include <map>
2010-09-15 18:36:07 +04:00
# include <boost/program_options.hpp>
2011-02-24 13:54:16 +03:00
# include <boost/algorithm/string.hpp>
2010-09-28 19:13:50 +04:00
# ifdef MPI_ENABLE
# include <boost/mpi.hpp>
namespace mpi = boost : : mpi ;
# endif
2010-09-15 18:36:07 +04:00
2011-06-29 20:18:55 +04:00
# include "Main.h"
2010-09-15 19:38:46 +04:00
# include "FeatureVector.h"
2010-09-15 18:36:07 +04:00
# include "StaticData.h"
2010-09-16 20:23:52 +04:00
# include "ChartTrellisPathList.h"
2010-09-17 11:35:31 +04:00
# include "ChartTrellisPath.h"
# include "ScoreComponentCollection.h"
2010-09-15 18:36:07 +04:00
# include "Decoder.h"
2010-09-15 19:38:46 +04:00
# include "Optimiser.h"
2011-04-22 23:17:33 +04:00
# include "Hildreth.h"
2011-09-26 21:31:59 +04:00
# include "ThreadPool.h"
2010-09-15 18:36:07 +04:00
using namespace Mira ;
using namespace std ;
using namespace Moses ;
namespace po = boost : : program_options ;
int main ( int argc , char * * argv ) {
2011-03-23 15:13:38 +03:00
size_t rank = 0 ;
size_t size = 1 ;
2010-09-28 19:13:50 +04:00
# ifdef MPI_ENABLE
2011-03-23 15:13:38 +03:00
mpi : : environment env ( argc , argv ) ;
mpi : : communicator world ;
rank = world . rank ( ) ;
size = world . size ( ) ;
2010-09-28 19:13:50 +04:00
# endif
2011-03-23 15:13:38 +03:00
cerr < < " Rank: " < < rank < < " Size: " < < size < < endl ;
bool help ;
int verbosity ;
string mosesConfigFile ;
string inputFile ;
vector < string > referenceFiles ;
2011-05-31 19:39:48 +04:00
string coreWeightFile ;
2011-03-23 15:13:38 +03:00
size_t epochs ;
string learner ;
bool shuffle ;
size_t mixingFrequency ;
size_t weightDumpFrequency ;
string weightDumpStem ;
float min_learning_rate ;
2011-06-03 17:04:59 +04:00
size_t scale_margin ;
2011-06-30 16:00:36 +04:00
size_t scale_update ;
2011-03-23 15:13:38 +03:00
size_t n ;
size_t batchSize ;
bool distinctNbest ;
bool onlyViolatedConstraints ;
bool accumulateWeights ;
float historySmoothing ;
bool scaleByInputLength ;
2011-10-26 14:16:45 +04:00
bool scaleByReferenceLength ;
2011-11-25 19:54:32 +04:00
bool scaleByTargetLengthLinear ;
bool scaleByTargetLengthTrend ;
2011-10-26 14:36:00 +04:00
bool scaleByAvgLength ;
2011-10-24 21:39:23 +04:00
float scaleByX ;
2012-02-20 21:27:35 +04:00
float slack , dummy ;
2011-03-23 15:13:38 +03:00
float slack_step ;
2011-05-09 13:39:57 +04:00
float slack_min ;
2011-04-15 15:34:51 +04:00
bool averageWeights ;
2011-03-23 15:13:38 +03:00
bool weightConvergence ;
float learning_rate ;
2011-05-31 22:34:27 +04:00
float mira_learning_rate ;
float perceptron_learning_rate ;
2011-03-23 15:13:38 +03:00
bool logFeatureValues ;
size_t baseOfLog ;
string decoder_settings ;
float min_weight_change ;
float decrease_learning_rate ;
bool normaliseWeights ;
bool print_feature_values ;
2011-04-26 23:35:06 +04:00
bool historyOf1best ;
2011-06-27 21:48:49 +04:00
bool historyOfOracles ;
2011-04-26 23:35:06 +04:00
bool sentenceLevelBleu ;
2012-02-04 00:20:25 +04:00
float bleuScoreWeight , bleuScoreWeight_hope , bleuScoreWeight_fear ;
2011-06-26 23:12:46 +04:00
float margin_slack ;
float margin_slack_incr ;
2011-05-10 21:17:19 +04:00
bool perceptron_update ;
2012-02-21 00:14:19 +04:00
bool hope_fear , hope_fear_rank ;
bool model_hope_fear , rank_only ;
int hope_n , fear_n , rank_n ;
2011-09-26 21:31:59 +04:00
int threadcount ;
2011-06-27 21:15:03 +04:00
size_t adapt_after_epoch ;
2011-10-23 01:23:58 +04:00
size_t bleu_smoothing_scheme ;
2012-01-16 20:08:56 +04:00
float max_length_dev_all ;
2011-10-27 19:37:10 +04:00
float max_length_dev_hypos ;
2012-01-16 20:08:56 +04:00
float max_length_dev_hope_ref ;
float max_length_dev_fear_ref ;
2011-11-16 22:06:14 +04:00
float relax_BP ;
2012-01-14 19:56:16 +04:00
bool delayUpdates ;
2012-01-16 20:11:02 +04:00
float min_oracle_bleu ;
2012-02-04 00:20:25 +04:00
float minBleuRatio , maxBleuRatio ;
2012-02-20 19:54:55 +04:00
bool boost ;
2011-03-23 15:13:38 +03:00
po : : options_description desc ( " Allowed options " ) ;
2011-04-26 23:35:06 +04:00
desc . add_options ( )
2012-02-20 19:54:55 +04:00
( " slack " , po : : value < float > ( & slack ) - > default_value ( 0.01 ) , " Use slack in optimiser " )
2012-02-20 21:27:35 +04:00
( " dummy " , po : : value < float > ( & dummy ) - > default_value ( - 1 ) , " Dummy variable for slack " )
2011-06-10 18:14:40 +04:00
( " accumulate-weights " , po : : value < bool > ( & accumulateWeights ) - > default_value ( false ) , " Accumulate and average weights over all epochs " )
2011-06-27 21:15:03 +04:00
( " adapt-after-epoch " , po : : value < size_t > ( & adapt_after_epoch ) - > default_value ( 0 ) , " Index of epoch after which adaptive parameters will be adapted " )
2011-06-10 18:14:40 +04:00
( " average-weights " , po : : value < bool > ( & averageWeights ) - > default_value ( false ) , " Set decoder weights to average weights after each update " )
2011-11-24 20:35:29 +04:00
( " base-of-log " , po : : value < size_t > ( & baseOfLog ) - > default_value ( 10 ) , " Base for taking logs of feature values " )
2011-06-10 18:14:40 +04:00
( " batch-size,b " , po : : value < size_t > ( & batchSize ) - > default_value ( 1 ) , " Size of batch that is send to optimiser for weight adjustments " )
2011-10-25 18:17:00 +04:00
( " bleu-score-weight " , po : : value < float > ( & bleuScoreWeight ) - > default_value ( 1.0 ) , " Bleu score weight used in the decoder objective function (on top of the Bleu objective weight) " )
2011-10-20 18:02:21 +04:00
( " bleu-score-weight-hope " , po : : value < float > ( & bleuScoreWeight_hope ) - > default_value ( - 1 ) , " Bleu score weight used in the decoder objective function for hope translations " )
2012-02-20 19:54:55 +04:00
( " bleu-score-weight-fear " , po : : value < float > ( & bleuScoreWeight_fear ) - > default_value ( - 1 ) , " Bleu score weight used in the decoder objective function for fear translations " )
2011-11-16 22:06:14 +04:00
( " bleu-smoothing-scheme " , po : : value < size_t > ( & bleu_smoothing_scheme ) - > default_value ( 1 ) , " Set a smoothing scheme for sentence-Bleu: +1 (1), +0.1 (2), papineni (3) (default:1) " )
2012-02-20 19:54:55 +04:00
( " boost " , po : : value < bool > ( & boost ) - > default_value ( false ) , " Apply boosting factor to updates on misranked candidates " )
2011-10-25 18:17:00 +04:00
( " config,f " , po : : value < string > ( & mosesConfigFile ) , " Moses ini-file " )
2011-06-10 18:14:40 +04:00
( " core-weights " , po : : value < string > ( & coreWeightFile ) , " Weight file containing the core weights (already tuned, have to be non-zero) " )
( " decoder-settings " , po : : value < string > ( & decoder_settings ) - > default_value ( " " ) , " Decoder settings for tuning runs " )
( " decr-learning-rate " , po : : value < float > ( & decrease_learning_rate ) - > default_value ( 0 ) , " Decrease learning rate by the given value after every epoch " )
2012-01-14 19:56:16 +04:00
( " delay-updates " , po : : value < bool > ( & delayUpdates ) - > default_value ( false ) , " Delay all updates until the end of an epoch " )
2011-10-25 18:17:00 +04:00
( " distinct-nbest " , po : : value < bool > ( & distinctNbest ) - > default_value ( true ) , " Use n-best list with distinct translations in inference step " )
2011-06-26 23:12:46 +04:00
( " epochs,e " , po : : value < size_t > ( & epochs ) - > default_value ( 10 ) , " Number of epochs " )
2011-06-10 18:14:40 +04:00
( " fear-n " , po : : value < int > ( & fear_n ) - > default_value ( - 1 ) , " Number of fear translations used " )
( " help " , po : : value ( & help ) - > zero_tokens ( ) - > default_value ( false ) , " Print this help message and exit " )
2011-06-27 21:48:49 +04:00
( " history-of-1best " , po : : value < bool > ( & historyOf1best ) - > default_value ( false ) , " Use 1best translations to update the history " )
( " history-of-oracles " , po : : value < bool > ( & historyOfOracles ) - > default_value ( false ) , " Use oracle translations to update the history " )
2011-06-26 23:12:46 +04:00
( " history-smoothing " , po : : value < float > ( & historySmoothing ) - > default_value ( 0.7 ) , " Adjust the factor for history smoothing " )
2011-10-25 18:17:00 +04:00
( " hope-fear " , po : : value < bool > ( & hope_fear ) - > default_value ( true ) , " Use only hope and fear translations for optimisation (not model) " )
2012-02-21 00:14:19 +04:00
( " hope-fear-rank " , po : : value < bool > ( & hope_fear_rank ) - > default_value ( false ) , " Use hope and fear translations for optimisation, use model for ranking " )
2011-06-10 18:14:40 +04:00
( " hope-n " , po : : value < int > ( & hope_n ) - > default_value ( - 1 ) , " Number of hope translations used " )
( " input-file,i " , po : : value < string > ( & inputFile ) , " Input file containing tokenised source " )
( " learner,l " , po : : value < string > ( & learner ) - > default_value ( " mira " ) , " Learning algorithm " )
( " log-feature-values " , po : : value < bool > ( & logFeatureValues ) - > default_value ( false ) , " Take log of feature values according to the given base. " )
2011-06-28 15:35:59 +04:00
( " margin-incr " , po : : value < float > ( & margin_slack_incr ) - > default_value ( 0 ) , " Increment margin slack after every epoch by this amount " )
( " margin-slack " , po : : value < float > ( & margin_slack ) - > default_value ( 0 ) , " Slack when comparing left and right hand side of constraints " )
2012-01-16 20:08:56 +04:00
( " max-length-dev-all " , po : : value < float > ( & max_length_dev_all ) - > default_value ( - 1 ) , " Make use of all 3 following options " )
( " max-length-dev-hypos " , po : : value < float > ( & max_length_dev_hypos ) - > default_value ( - 1 ) , " Number between 0 and 1 specifying the percentage of admissible length deviation between hope and fear translations " )
( " max-length-dev-hope-ref " , po : : value < float > ( & max_length_dev_hope_ref ) - > default_value ( - 1 ) , " Number between 0 and 1 specifying the percentage of admissible length deviation between hope and reference translations " )
( " max-length-dev-fear-ref " , po : : value < float > ( & max_length_dev_fear_ref ) - > default_value ( - 1 ) , " Number between 0 and 1 specifying the percentage of admissible length deviation between fear and reference translations " )
2012-02-21 00:14:19 +04:00
( " min-bleu-ratio " , po : : value < float > ( & minBleuRatio ) - > default_value ( - 1 ) , " Set a minimum BLEU ratio between hope and fear " )
( " max-bleu-ratio " , po : : value < float > ( & maxBleuRatio ) - > default_value ( - 1 ) , " Set a maximum BLEU ratio between hope and fear " )
( " min-learning-rate " , po : : value < float > ( & min_learning_rate ) - > default_value ( 0 ) , " Set a minimum learning rate " )
2012-01-16 20:11:02 +04:00
( " min-oracle-bleu " , po : : value < float > ( & min_oracle_bleu ) - > default_value ( 0 ) , " Set a minimum oracle BLEU score " )
2012-02-21 00:14:19 +04:00
( " min-weight-change " , po : : value < float > ( & min_weight_change ) - > default_value ( 0.01 ) , " Set minimum weight change for stopping criterion " )
2011-06-28 15:35:59 +04:00
( " mira-learning-rate " , po : : value < float > ( & mira_learning_rate ) - > default_value ( 1 ) , " Learning rate for MIRA (fixed or flexible) " )
2011-06-26 23:12:46 +04:00
( " mixing-frequency " , po : : value < size_t > ( & mixingFrequency ) - > default_value ( 5 ) , " How often per epoch to mix weights, when using mpi " )
2011-10-25 18:17:00 +04:00
( " model-hope-fear " , po : : value < bool > ( & model_hope_fear ) - > default_value ( false ) , " Use model, hope and fear translations for optimisation " )
( " nbest,n " , po : : value < size_t > ( & n ) - > default_value ( 1 ) , " Number of translations in n-best list " )
2011-06-10 18:14:40 +04:00
( " normalise " , po : : value < bool > ( & normaliseWeights ) - > default_value ( false ) , " Whether to normalise the updated weights before passing them to the decoder " )
( " only-violated-constraints " , po : : value < bool > ( & onlyViolatedConstraints ) - > default_value ( false ) , " Add only violated constraints to the optimisation problem " )
( " perceptron-learning-rate " , po : : value < float > ( & perceptron_learning_rate ) - > default_value ( 0.01 ) , " Perceptron learning rate " )
( " print-feature-values " , po : : value < bool > ( & print_feature_values ) - > default_value ( false ) , " Print out feature values " )
2012-02-21 00:14:19 +04:00
( " rank-n " , po : : value < int > ( & rank_n ) - > default_value ( - 1 ) , " Number of translations used for ranking " )
( " rank-only " , po : : value < bool > ( & rank_only ) - > default_value ( false ) , " Use only model translations for optimisation " )
2011-06-10 18:14:40 +04:00
( " reference-files,r " , po : : value < vector < string > > ( & referenceFiles ) , " Reference translation files for training " )
2011-11-16 22:06:14 +04:00
( " relax-BP " , po : : value < float > ( & relax_BP ) - > default_value ( 1 ) , " Relax the BP by setting this value between 0 and 1 " )
2011-10-24 13:43:53 +04:00
( " scale-by-input-length " , po : : value < bool > ( & scaleByInputLength ) - > default_value ( true ) , " Scale the BLEU score by (a history of) the input length " )
2011-10-26 14:36:00 +04:00
( " scale-by-reference-length " , po : : value < bool > ( & scaleByReferenceLength ) - > default_value ( false ) , " Scale BLEU by (a history of) the reference length " )
2011-11-25 19:54:32 +04:00
( " scale-by-target-length-linear " , po : : value < bool > ( & scaleByTargetLengthLinear ) - > default_value ( false ) , " Scale BLEU by (a history of) the target length (linear future estimate) " )
( " scale-by-target-length-trend " , po : : value < bool > ( & scaleByTargetLengthTrend ) - > default_value ( false ) , " Scale BLEU by (a history of) the target length (trend-based future estimate) " )
2011-10-26 14:36:00 +04:00
( " scale-by-avg-length " , po : : value < bool > ( & scaleByAvgLength ) - > default_value ( false ) , " Scale BLEU by (a history of) the average of input and reference length " )
2011-10-24 21:39:23 +04:00
( " scale-by-x " , po : : value < float > ( & scaleByX ) - > default_value ( 1 ) , " Scale the BLEU score by value x " )
2011-06-28 15:35:59 +04:00
( " scale-margin " , po : : value < size_t > ( & scale_margin ) - > default_value ( 0 ) , " Scale the margin by the Bleu score of the oracle translation " )
2012-02-04 00:20:25 +04:00
( " scale-update " , po : : value < size_t > ( & scale_update ) - > default_value ( 0 ) , " Scale the update by the Bleu score of the oracle translation " )
2012-02-21 00:14:19 +04:00
( " sentence-level-bleu " , po : : value < bool > ( & sentenceLevelBleu ) - > default_value ( true ) , " Use a sentences level Bleu scoring function " )
2011-06-10 18:14:40 +04:00
( " shuffle " , po : : value < bool > ( & shuffle ) - > default_value ( false ) , " Shuffle input sentences before processing " )
2011-06-30 16:00:36 +04:00
( " slack-min " , po : : value < float > ( & slack_min ) - > default_value ( 0.01 ) , " Minimum slack used " )
( " slack-step " , po : : value < float > ( & slack_step ) - > default_value ( 0 ) , " Increase slack from epoch to epoch by the value provided " )
( " stop-weights " , po : : value < bool > ( & weightConvergence ) - > default_value ( true ) , " Stop when weights converge " )
2011-09-26 21:31:59 +04:00
( " threads " , po : : value < int > ( & threadcount ) - > default_value ( 1 ) , " Number of threads used " )
2011-06-30 16:00:36 +04:00
( " verbosity,v " , po : : value < int > ( & verbosity ) - > default_value ( 0 ) , " Verbosity level " )
2012-02-21 00:14:19 +04:00
( " weight-dump-frequency " , po : : value < size_t > ( & weightDumpFrequency ) - > default_value ( 1 ) , " How often per epoch to dump weights, when using mpi " )
2011-06-30 16:00:36 +04:00
( " weight-dump-stem " , po : : value < string > ( & weightDumpStem ) - > default_value ( " weights " ) , " Stem of filename to use for dumping weights " ) ;
2011-03-23 15:13:38 +03:00
po : : options_description cmdline_options ;
cmdline_options . add ( desc ) ;
po : : variables_map vm ;
2011-06-26 23:12:46 +04:00
po : : store ( po : : command_line_parser ( argc , argv ) . options ( cmdline_options ) . run ( ) , vm ) ;
2011-03-23 15:13:38 +03:00
po : : notify ( vm ) ;
if ( help ) {
std : : cout < < " Usage: " + string ( argv [ 0 ] )
+ " -f mosesini-file -i input-file -r reference-file(s) [options] "
< < std : : endl ;
std : : cout < < desc < < std : : endl ;
return 0 ;
}
2011-09-26 21:31:59 +04:00
// create threadpool, if using multi-threaded decoding
// note: multi-threading is done on sentence-level,
// each thread translates one sentence
# ifdef WITH_THREADS
if ( threadcount < 1 ) {
cerr < < " Error: Need to specify a positive number of threads " < < endl ;
exit ( 1 ) ;
}
ThreadPool pool ( threadcount ) ;
# else
if ( threadcount > 1 ) {
cerr < < " Error: Thread count of " < < threadcount < < " but moses not built with thread support " < < endl ;
exit ( 1 ) ;
}
# endif
2012-02-20 21:27:35 +04:00
if ( dummy ! = - 1 )
slack = dummy ;
2011-03-23 15:13:38 +03:00
if ( mosesConfigFile . empty ( ) ) {
cerr < < " Error: No moses ini file specified " < < endl ;
return 1 ;
}
if ( inputFile . empty ( ) ) {
cerr < < " Error: No input file specified " < < endl ;
return 1 ;
}
if ( ! referenceFiles . size ( ) ) {
cerr < < " Error: No reference files specified " < < endl ;
return 1 ;
}
// load input and references
vector < string > inputSentences ;
if ( ! loadSentences ( inputFile , inputSentences ) ) {
cerr < < " Error: Failed to load input sentences from " < < inputFile < < endl ;
return 1 ;
}
vector < vector < string > > referenceSentences ( referenceFiles . size ( ) ) ;
for ( size_t i = 0 ; i < referenceFiles . size ( ) ; + + i ) {
if ( ! loadSentences ( referenceFiles [ i ] , referenceSentences [ i ] ) ) {
cerr < < " Error: Failed to load reference sentences from "
< < referenceFiles [ i ] < < endl ;
return 1 ;
}
if ( referenceSentences [ i ] . size ( ) ! = inputSentences . size ( ) ) {
cerr < < " Error: Input file length ( " < < inputSentences . size ( ) < < " ) != ( "
< < referenceSentences [ i ] . size ( ) < < " ) length of reference file " < < i
< < endl ;
return 1 ;
}
}
2011-11-25 19:54:32 +04:00
if ( scaleByReferenceLength | | scaleByTargetLengthLinear | | scaleByTargetLengthTrend | | scaleByAvgLength )
2011-10-26 14:36:00 +04:00
scaleByInputLength = false ;
2011-10-24 13:43:53 +04:00
2011-03-23 15:13:38 +03:00
// initialise Moses
2011-11-16 13:13:17 +04:00
// add initial Bleu weight and references to initialize Bleu feature
2011-11-16 16:25:26 +04:00
decoder_settings + = " -weight-bl 1 -references " ;
2011-11-16 13:13:17 +04:00
for ( size_t i = 0 ; i < referenceFiles . size ( ) ; + + i ) {
decoder_settings + = " " ;
decoder_settings + = referenceFiles [ i ] ;
}
2011-03-23 15:13:38 +03:00
vector < string > decoder_params ;
boost : : split ( decoder_params , decoder_settings , boost : : is_any_of ( " \t " ) ) ;
2011-11-16 13:13:17 +04:00
MosesDecoder * decoder = new MosesDecoder ( mosesConfigFile , verbosity , decoder_params . size ( ) , decoder_params ) ;
2011-11-25 19:54:32 +04:00
decoder - > setBleuParameters ( scaleByInputLength , scaleByReferenceLength , scaleByAvgLength ,
scaleByTargetLengthLinear , scaleByTargetLengthTrend ,
2011-11-16 22:06:14 +04:00
scaleByX , historySmoothing , bleu_smoothing_scheme , relax_BP ) ;
2011-03-23 15:13:38 +03:00
if ( normaliseWeights ) {
ScoreComponentCollection startWeights = decoder - > getWeights ( ) ;
startWeights . L1Normalise ( ) ;
decoder - > setWeights ( startWeights ) ;
}
2011-05-31 22:34:27 +04:00
// Optionally shuffle the sentences
vector < size_t > order ;
if ( rank = = 0 ) {
for ( size_t i = 0 ; i < inputSentences . size ( ) ; + + i ) {
order . push_back ( i ) ;
}
if ( shuffle ) {
cerr < < " Shuffling input sentences.. " < < endl ;
RandomIndex rindex ;
random_shuffle ( order . begin ( ) , order . end ( ) , rindex ) ;
}
}
// initialise optimizer
Optimiser * optimiser = NULL ;
if ( learner = = " mira " ) {
2011-06-26 23:12:46 +04:00
if ( rank = = 0 ) {
cerr < < " Optimising using Mira " < < endl ;
2012-02-19 23:56:16 +04:00
cerr < < " slack: " < < slack < < " , learning rate: " < < mira_learning_rate < < endl ;
2011-06-26 23:12:46 +04:00
}
2012-02-20 19:54:55 +04:00
optimiser = new MiraOptimiser ( onlyViolatedConstraints , slack , scale_margin , scale_update , margin_slack , boost ) ;
2011-05-31 22:34:27 +04:00
learning_rate = mira_learning_rate ;
perceptron_update = false ;
} else if ( learner = = " perceptron " ) {
2011-06-26 23:12:46 +04:00
if ( rank = = 0 ) {
cerr < < " Optimising using Perceptron " < < endl ;
}
2011-05-31 22:34:27 +04:00
optimiser = new Perceptron ( ) ;
learning_rate = perceptron_learning_rate ;
perceptron_update = true ;
model_hope_fear = false ; // mira only
2012-02-21 00:14:19 +04:00
rank_only = false ; // mira only
2011-05-31 22:34:27 +04:00
hope_fear = false ; // mira only
2012-02-21 00:14:19 +04:00
hope_fear_rank = false ; // mira only
2011-06-29 20:44:28 +04:00
n = 1 ;
hope_n = 1 ;
fear_n = 1 ;
2011-05-31 22:34:27 +04:00
} else {
cerr < < " Error: Unknown optimiser: " < < learner < < endl ;
return 1 ;
}
// resolve parameter dependencies
2011-06-28 15:35:59 +04:00
if ( batchSize > 1 & & perceptron_update ) {
2011-05-09 13:39:57 +04:00
batchSize = 1 ;
2011-06-28 15:35:59 +04:00
cerr < < " Info: Setting batch size to 1 for perceptron update " < < endl ;
2011-04-26 23:35:06 +04:00
}
2012-02-20 19:54:55 +04:00
if ( hope_n = = - 1 )
2011-05-31 22:34:27 +04:00
hope_n = n ;
2012-02-20 19:54:55 +04:00
if ( fear_n = = - 1 )
2011-05-31 22:34:27 +04:00
fear_n = n ;
2012-02-21 00:14:19 +04:00
if ( rank_n = = - 1 )
rank_n = n ;
2012-02-20 19:54:55 +04:00
2011-06-28 15:35:59 +04:00
if ( model_hope_fear & & hope_fear ) {
2011-05-31 22:34:27 +04:00
hope_fear = false ; // is true by default
}
2012-02-21 00:14:19 +04:00
if ( rank_only & & hope_fear ) {
hope_fear = false ; // is true by default
}
if ( hope_fear_rank & & hope_fear ) {
hope_fear = false ; // is true by default
}
if ( learner = = " mira " & & ! ( hope_fear | | model_hope_fear | | rank_only | | hope_fear_rank ) ) {
2011-06-29 20:44:28 +04:00
cerr < < " Error: Need to select an one of parameters --hope-fear/--model-hope-fear for mira update. " < < endl ;
return 1 ;
2011-06-26 23:12:46 +04:00
}
2012-01-16 15:22:58 +04:00
if ( historyOf1best | | historyOfOracles )
sentenceLevelBleu = false ;
2011-06-27 21:48:49 +04:00
if ( ! sentenceLevelBleu ) {
if ( ! historyOf1best & & ! historyOfOracles ) {
historyOf1best = true ;
}
}
2011-10-20 18:02:21 +04:00
if ( bleuScoreWeight_hope = = - 1 ) {
bleuScoreWeight_hope = bleuScoreWeight ;
}
2012-02-04 00:20:25 +04:00
if ( bleuScoreWeight_fear = = - 1 ) {
bleuScoreWeight_fear = bleuScoreWeight ;
}
2011-05-31 22:34:27 +04:00
2012-01-16 20:08:56 +04:00
if ( max_length_dev_all ! = - 1 ) {
max_length_dev_hypos = max_length_dev_all ;
max_length_dev_hope_ref = max_length_dev_all ;
max_length_dev_fear_ref = max_length_dev_all ;
2011-10-27 19:37:10 +04:00
}
2010-09-28 19:13:50 +04:00
# ifdef MPI_ENABLE
2011-03-23 15:13:38 +03:00
mpi : : broadcast ( world , order , 0 ) ;
2010-09-28 19:13:50 +04:00
# endif
2011-06-10 18:14:40 +04:00
// Create shards according to the number of processes used
2011-03-23 15:13:38 +03:00
vector < size_t > shard ;
float shardSize = ( float ) ( order . size ( ) ) / size ;
VERBOSE ( 1 , " Shard size: " < < shardSize < < endl ) ;
size_t shardStart = ( size_t ) ( shardSize * rank ) ;
size_t shardEnd = ( size_t ) ( shardSize * ( rank + 1 ) ) ;
if ( rank = = size - 1 )
shardEnd = order . size ( ) ;
VERBOSE ( 1 , " Rank: " < < rank < < " Shard start: " < < shardStart < < " Shard end: " < < shardEnd < < endl ) ;
shard . resize ( shardSize ) ;
copy ( order . begin ( ) + shardStart , order . begin ( ) + shardEnd , shard . begin ( ) ) ;
2011-11-16 13:13:17 +04:00
// get reference to feature functions
2011-05-31 19:39:48 +04:00
const vector < const ScoreProducer * > featureFunctions =
2011-11-16 13:13:17 +04:00
StaticData : : Instance ( ) . GetTranslationSystem ( TranslationSystem : : DEFAULT ) . GetFeatureFunctions ( ) ;
2012-02-01 23:47:43 +04:00
// read core weight file
ProducerWeightMap coreWeightMap ;
if ( ! coreWeightFile . empty ( ) ) {
if ( ! loadCoreWeights ( coreWeightFile , coreWeightMap , featureFunctions ) ) {
cerr < < " Error: Failed to load core weights from " < < coreWeightFile < < endl ;
return 1 ;
}
else
cerr < < " Loaded core weights from " < < coreWeightFile < < " . " < < endl ;
}
2011-11-16 13:13:17 +04:00
// set core weights
2011-05-31 19:39:48 +04:00
ScoreComponentCollection initialWeights = decoder - > getWeights ( ) ;
if ( coreWeightMap . size ( ) > 0 ) {
2012-02-01 23:47:43 +04:00
ProducerWeightMap : : iterator p ;
2011-06-01 21:26:41 +04:00
for ( p = coreWeightMap . begin ( ) ; p ! = coreWeightMap . end ( ) ; + + p )
{
initialWeights . Assign ( p - > first , p - > second ) ;
2011-05-31 19:39:48 +04:00
}
}
decoder - > setWeights ( initialWeights ) ;
2011-03-23 15:13:38 +03:00
//Main loop:
2011-03-28 22:11:45 +04:00
// print initial weights
2011-05-31 19:39:48 +04:00
cerr < < " Rank " < < rank < < " , initial weights: " < < initialWeights < < endl ;
2011-03-29 21:08:07 +04:00
ScoreComponentCollection cumulativeWeights ; // collect weights per epoch to produce an average
2011-04-09 01:04:08 +04:00
size_t numberOfUpdates = 0 ;
size_t numberOfUpdatesThisEpoch = 0 ;
2011-03-23 15:13:38 +03:00
2011-06-26 23:12:46 +04:00
time_t now ;
time ( & now ) ;
cerr < < " Rank " < < rank < < " , " < < ctime ( & now ) < < endl ;
2011-03-23 15:13:38 +03:00
2011-03-28 22:11:45 +04:00
ScoreComponentCollection mixedAverageWeights ;
ScoreComponentCollection mixedAverageWeightsPrevious ;
ScoreComponentCollection mixedAverageWeightsBeforePrevious ;
2011-03-23 15:13:38 +03:00
2012-01-14 19:56:16 +04:00
// for accumulating delayed updates
ScoreComponentCollection delayedWeightUpdates ;
2011-03-23 15:13:38 +03:00
bool stop = false ;
2011-10-25 18:17:00 +04:00
// int sumStillViolatedConstraints;
2011-03-23 15:13:38 +03:00
float * sendbuf , * recvbuf ;
sendbuf = ( float * ) malloc ( sizeof ( float ) ) ;
recvbuf = ( float * ) malloc ( sizeof ( float ) ) ;
for ( size_t epoch = 0 ; epoch < epochs & & ! stop ; + + epoch ) {
2011-06-29 22:06:28 +04:00
// sum of violated constraints in an epoch
2011-10-25 18:17:00 +04:00
// sumStillViolatedConstraints = 0;
2011-04-09 01:04:08 +04:00
numberOfUpdatesThisEpoch = 0 ;
2011-03-23 15:13:38 +03:00
// Sum up weights over one epoch, final average uses weights from last epoch
2012-01-23 16:22:12 +04:00
if ( ! accumulateWeights )
2011-03-23 15:13:38 +03:00
cumulativeWeights . ZeroAll ( ) ;
2012-01-23 16:22:12 +04:00
delayedWeightUpdates . ZeroAll ( ) ;
2011-03-23 15:13:38 +03:00
// number of weight dumps this epoch
size_t weightEpochDump = 0 ;
size_t shardPosition = 0 ;
vector < size_t > : : const_iterator sid = shard . begin ( ) ;
while ( sid ! = shard . end ( ) ) {
// feature values for hypotheses i,j (matrix: batchSize x 3*n x featureValues)
vector < vector < ScoreComponentCollection > > featureValues ;
vector < vector < float > > bleuScores ;
2012-02-20 21:27:35 +04:00
vector < vector < float > > modelScores ;
2011-03-23 15:13:38 +03:00
2011-06-29 20:44:28 +04:00
// variables for hope-fear/perceptron setting
2011-05-16 20:56:52 +04:00
vector < vector < ScoreComponentCollection > > featureValuesHope ;
vector < vector < ScoreComponentCollection > > featureValuesFear ;
vector < vector < float > > bleuScoresHope ;
vector < vector < float > > bleuScoresFear ;
2012-02-20 21:27:35 +04:00
vector < vector < float > > modelScoresHope ;
vector < vector < float > > modelScoresFear ;
2011-06-29 20:44:28 +04:00
vector < vector < ScoreComponentCollection > > dummyFeatureValues ;
vector < vector < float > > dummyBleuScores ;
2012-02-20 21:27:35 +04:00
vector < vector < float > > dummyModelScores ;
2011-05-16 20:56:52 +04:00
2011-03-23 15:13:38 +03:00
// get moses weights
ScoreComponentCollection mosesWeights = decoder - > getWeights ( ) ;
2011-06-26 23:12:46 +04:00
VERBOSE ( 1 , " \n Rank " < < rank < < " , epoch " < < epoch < < " , weights: " < < mosesWeights < < endl ) ;
2011-03-23 15:13:38 +03:00
// BATCHING: produce nbest lists for all input sentences in batch
vector < float > oracleBleuScores ;
2012-02-20 21:27:35 +04:00
vector < float > oracleModelScores ;
2011-03-23 15:13:38 +03:00
vector < vector < const Word * > > oracles ;
2011-04-26 23:35:06 +04:00
vector < vector < const Word * > > oneBests ;
2011-03-23 15:13:38 +03:00
vector < ScoreComponentCollection > oracleFeatureValues ;
vector < size_t > inputLengths ;
vector < size_t > ref_ids ;
size_t actualBatchSize = 0 ;
2011-04-04 17:03:27 +04:00
2011-04-11 16:22:19 +04:00
vector < size_t > : : const_iterator current_sid_start = sid ;
2011-10-25 18:17:00 +04:00
size_t examples_in_batch = 0 ;
2011-03-23 15:13:38 +03:00
for ( size_t batchPosition = 0 ; batchPosition < batchSize & & sid
! = shard . end ( ) ; + + batchPosition ) {
2011-04-04 17:03:27 +04:00
string & input = inputSentences [ * sid ] ;
2012-01-12 20:26:16 +04:00
// const vector<string>& refs = referenceSentences[*sid];
2011-06-26 23:12:46 +04:00
cerr < < " \n Rank " < < rank < < " , epoch " < < epoch < < " , input sentence " < < * sid < < " : \" " < < input < < " \" " < < " (batch pos " < < batchPosition < < " ) " < < endl ;
2011-03-23 15:13:38 +03:00
vector < ScoreComponentCollection > newFeatureValues ;
2012-02-20 21:27:35 +04:00
vector < float > newScores ;
2012-02-21 00:14:19 +04:00
if ( model_hope_fear | | rank_only | | hope_fear_rank ) {
2011-05-31 22:34:27 +04:00
featureValues . push_back ( newFeatureValues ) ;
2012-02-20 21:27:35 +04:00
bleuScores . push_back ( newScores ) ;
modelScores . push_back ( newScores ) ;
2011-05-31 22:34:27 +04:00
}
2012-02-21 00:14:19 +04:00
if ( hope_fear | | hope_fear_rank | | perceptron_update ) {
2011-05-16 20:56:52 +04:00
featureValuesHope . push_back ( newFeatureValues ) ;
featureValuesFear . push_back ( newFeatureValues ) ;
2012-02-20 21:27:35 +04:00
bleuScoresHope . push_back ( newScores ) ;
bleuScoresFear . push_back ( newScores ) ;
modelScoresHope . push_back ( newScores ) ;
modelScoresFear . push_back ( newScores ) ;
2012-02-01 23:47:43 +04:00
if ( historyOf1best ) {
2011-06-29 20:44:28 +04:00
dummyFeatureValues . push_back ( newFeatureValues ) ;
2012-02-20 21:27:35 +04:00
dummyBleuScores . push_back ( newScores ) ;
dummyModelScores . push_back ( newScores ) ;
2011-06-29 20:44:28 +04:00
}
2011-05-16 20:56:52 +04:00
}
2011-05-01 18:17:40 +04:00
2012-01-16 15:22:58 +04:00
size_t ref_length ;
float avg_ref_length ;
2012-02-21 00:14:19 +04:00
if ( hope_fear | | hope_fear_rank | | perceptron_update ) {
2011-05-16 20:56:52 +04:00
// HOPE
2011-10-23 01:23:58 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , " < < hope_n < < " best hope translations " < < endl ;
2011-10-20 18:02:21 +04:00
vector < const Word * > oracle = decoder - > getNBest ( input , * sid , hope_n , 1.0 , bleuScoreWeight_hope ,
2012-02-20 21:27:35 +04:00
featureValuesHope [ batchPosition ] , bleuScoresHope [ batchPosition ] , modelScoresHope [ batchPosition ] ,
true , distinctNbest , rank , epoch ) ;
2011-10-25 18:17:00 +04:00
size_t current_input_length = decoder - > getCurrentInputLength ( ) ;
2011-05-16 20:56:52 +04:00
decoder - > cleanup ( ) ;
2012-01-16 15:22:58 +04:00
ref_length = decoder - > getClosestReferenceLength ( * sid , oracle . size ( ) ) ;
avg_ref_length = ref_length ;
float hope_length_ratio = ( float ) oracle . size ( ) / ref_length ;
2012-02-04 00:20:25 +04:00
int oracleSize = ( int ) oracle . size ( ) ;
2011-10-23 01:23:58 +04:00
cerr < < " , l-ratio hope: " < < hope_length_ratio < < endl ;
2012-01-23 19:15:27 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , current input length: " < < current_input_length < < endl ;
2011-05-16 20:56:52 +04:00
2012-02-04 00:20:25 +04:00
bool skip = false ;
// Length-related example selection
float length_diff_hope = abs ( 1 - hope_length_ratio ) ;
if ( max_length_dev_hope_ref ! = - 1 & & length_diff_hope > max_length_dev_hope_ref )
skip = true ;
2011-10-25 18:17:00 +04:00
vector < const Word * > bestModel ;
2012-02-04 00:20:25 +04:00
if ( historyOf1best & & ! skip ) {
2011-10-20 18:03:32 +04:00
// MODEL (for updating the history only, using dummy vectors)
2012-01-12 20:26:16 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , 1best wrt model score (for history or length stabilisation) " < < endl ;
2011-10-25 18:17:00 +04:00
bestModel = decoder - > getNBest ( input , * sid , 1 , 0.0 , bleuScoreWeight ,
2012-02-20 21:27:35 +04:00
dummyFeatureValues [ batchPosition ] , dummyBleuScores [ batchPosition ] , dummyModelScores [ batchPosition ] ,
true , distinctNbest , rank , epoch ) ;
2011-10-20 18:03:32 +04:00
decoder - > cleanup ( ) ;
2012-01-12 20:26:16 +04:00
cerr < < endl ;
2012-01-16 15:22:58 +04:00
ref_length = decoder - > getClosestReferenceLength ( * sid , bestModel . size ( ) ) ;
2011-10-20 18:03:32 +04:00
}
2011-05-16 20:56:52 +04:00
// FEAR
2012-02-04 00:20:25 +04:00
float fear_length_ratio = 0 ;
2012-02-14 18:31:26 +04:00
float bleuRatioHopeFear = 0 ;
2012-02-04 00:20:25 +04:00
int fearSize = 0 ;
if ( ! skip ) {
2012-02-14 18:31:26 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , " < < fear_n < < " best fear translations " < < endl ;
vector < const Word * > fear = decoder - > getNBest ( input , * sid , fear_n , - 1.0 , bleuScoreWeight_fear ,
2012-02-20 21:27:35 +04:00
featureValuesFear [ batchPosition ] , bleuScoresFear [ batchPosition ] , modelScoresFear [ batchPosition ] ,
true , distinctNbest , rank , epoch ) ;
2012-02-14 18:31:26 +04:00
decoder - > cleanup ( ) ;
ref_length = decoder - > getClosestReferenceLength ( * sid , fear . size ( ) ) ;
avg_ref_length + = ref_length ;
avg_ref_length / = 2 ;
fear_length_ratio = ( float ) fear . size ( ) / ref_length ;
fearSize = ( int ) fear . size ( ) ;
cerr < < " , l-ratio fear: " < < fear_length_ratio < < endl ;
2012-02-14 22:57:47 +04:00
for ( size_t i = 0 ; i < fear . size ( ) ; + + i )
2012-02-14 18:31:26 +04:00
delete fear [ i ] ;
2011-10-23 01:23:58 +04:00
2012-02-14 18:31:26 +04:00
// Bleu-related example selection
bleuRatioHopeFear = bleuScoresHope [ batchPosition ] [ 0 ] / bleuScoresFear [ batchPosition ] [ 0 ] ;
if ( minBleuRatio ! = - 1 & & bleuRatioHopeFear < minBleuRatio )
skip = true ;
if ( maxBleuRatio ! = - 1 & & bleuRatioHopeFear > maxBleuRatio )
skip = true ;
// Length-related example selection
float length_diff_fear = abs ( 1 - fear_length_ratio ) ;
size_t length_diff_hope_fear = abs ( oracleSize - fearSize ) ;
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , abs-length hope-fear: " < < length_diff_hope_fear < < " , BLEU hope-fear: " < < bleuScoresHope [ batchPosition ] [ 0 ] - bleuScoresFear [ batchPosition ] [ 0 ] < < endl ;
if ( max_length_dev_hypos ! = - 1 & & ( length_diff_hope_fear > avg_ref_length * max_length_dev_hypos ) )
skip = true ;
if ( max_length_dev_fear_ref ! = - 1 & & length_diff_fear > max_length_dev_fear_ref )
skip = true ;
}
2012-02-04 00:20:25 +04:00
2011-10-27 19:37:10 +04:00
if ( skip ) {
2012-02-14 18:31:26 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , skip example ( " < < hope_length_ratio < < " , " < < bleuRatioHopeFear < < " ).. " < < endl ;
2011-10-25 18:17:00 +04:00
featureValuesHope [ batchPosition ] . clear ( ) ;
featureValuesFear [ batchPosition ] . clear ( ) ;
bleuScoresHope [ batchPosition ] . clear ( ) ;
bleuScoresFear [ batchPosition ] . clear ( ) ;
if ( historyOf1best ) {
dummyFeatureValues [ batchPosition ] . clear ( ) ;
dummyBleuScores [ batchPosition ] . clear ( ) ;
}
}
else {
// needed for history
inputLengths . push_back ( current_input_length ) ;
ref_ids . push_back ( * sid ) ;
if ( ! sentenceLevelBleu ) {
oracles . push_back ( oracle ) ;
oneBests . push_back ( bestModel ) ;
}
examples_in_batch + + ;
}
2011-05-16 20:56:52 +04:00
}
2012-02-21 00:14:19 +04:00
if ( rank_only | | hope_fear_rank ) {
// MODEL
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , " < < rank_n < < " best wrt model score " < < endl ;
vector < const Word * > bestModel = decoder - > getNBest ( input , * sid , rank_n , 0.0 , bleuScoreWeight ,
featureValues [ batchPosition ] , bleuScores [ batchPosition ] , modelScores [ batchPosition ] ,
true , distinctNbest , rank , epoch ) ;
decoder - > cleanup ( ) ;
oneBests . push_back ( bestModel ) ;
ref_length = decoder - > getClosestReferenceLength ( * sid , bestModel . size ( ) ) ;
float model_length_ratio = ( float ) bestModel . size ( ) / ref_length ;
cerr < < " , l-ratio model: " < < model_length_ratio < < endl ;
examples_in_batch + + ;
}
if ( model_hope_fear ) {
2011-06-29 20:44:28 +04:00
// HOPE
2011-10-23 01:23:58 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , " < < n < < " best hope translations " < < endl ;
2011-06-29 20:44:28 +04:00
size_t oraclePos = featureValues [ batchPosition ] . size ( ) ;
2011-10-20 18:02:21 +04:00
vector < const Word * > oracle = decoder - > getNBest ( input , * sid , n , 1.0 , bleuScoreWeight_hope ,
2012-02-20 21:27:35 +04:00
featureValues [ batchPosition ] , bleuScores [ batchPosition ] , modelScores [ batchPosition ] ,
true , distinctNbest , rank , epoch ) ;
2011-06-29 20:44:28 +04:00
// needed for history
inputLengths . push_back ( decoder - > getCurrentInputLength ( ) ) ;
ref_ids . push_back ( * sid ) ;
decoder - > cleanup ( ) ;
oracles . push_back ( oracle ) ;
2012-01-16 15:22:58 +04:00
ref_length = decoder - > getClosestReferenceLength ( * sid , oracle . size ( ) ) ;
float hope_length_ratio = ( float ) oracle . size ( ) / ref_length ;
2011-10-23 01:23:58 +04:00
cerr < < " , l-ratio hope: " < < hope_length_ratio < < endl ;
2011-06-29 20:44:28 +04:00
oracleFeatureValues . push_back ( featureValues [ batchPosition ] [ oraclePos ] ) ;
oracleBleuScores . push_back ( bleuScores [ batchPosition ] [ oraclePos ] ) ;
2012-02-20 21:27:35 +04:00
oracleModelScores . push_back ( modelScores [ batchPosition ] [ oraclePos ] ) ;
2011-06-29 20:44:28 +04:00
2011-10-20 18:03:32 +04:00
// MODEL
2011-10-23 01:23:58 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , " < < n < < " best wrt model score " < < endl ;
2011-10-20 18:03:32 +04:00
vector < const Word * > bestModel = decoder - > getNBest ( input , * sid , n , 0.0 , bleuScoreWeight ,
2012-02-20 21:27:35 +04:00
featureValues [ batchPosition ] , bleuScores [ batchPosition ] , modelScores [ batchPosition ] ,
true , distinctNbest , rank , epoch ) ;
2011-10-20 18:03:32 +04:00
decoder - > cleanup ( ) ;
oneBests . push_back ( bestModel ) ;
2012-01-16 15:22:58 +04:00
ref_length = decoder - > getClosestReferenceLength ( * sid , bestModel . size ( ) ) ;
float model_length_ratio = ( float ) bestModel . size ( ) / ref_length ;
2011-10-23 01:23:58 +04:00
cerr < < " , l-ratio model: " < < model_length_ratio < < endl ;
2011-10-20 18:03:32 +04:00
2011-06-29 20:44:28 +04:00
// FEAR
2011-10-23 01:23:58 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , " < < n < < " best fear translations " < < endl ;
2011-06-29 20:44:28 +04:00
size_t fearPos = featureValues [ batchPosition ] . size ( ) ;
2012-02-04 00:20:25 +04:00
vector < const Word * > fear = decoder - > getNBest ( input , * sid , n , - 1.0 , bleuScoreWeight_fear ,
2012-02-20 21:27:35 +04:00
featureValues [ batchPosition ] , bleuScores [ batchPosition ] , modelScores [ batchPosition ] ,
true , distinctNbest , rank , epoch ) ;
2011-06-29 20:44:28 +04:00
decoder - > cleanup ( ) ;
2012-01-16 15:22:58 +04:00
ref_length = decoder - > getClosestReferenceLength ( * sid , fear . size ( ) ) ;
float fear_length_ratio = ( float ) fear . size ( ) / ref_length ;
2011-10-23 01:23:58 +04:00
cerr < < " , l-ratio fear: " < < fear_length_ratio < < endl ;
2011-06-29 20:44:28 +04:00
for ( size_t i = 0 ; i < fear . size ( ) ; + + i ) {
delete fear [ i ] ;
2011-05-09 13:39:57 +04:00
}
2011-10-25 18:17:00 +04:00
examples_in_batch + + ;
2011-03-22 20:17:43 +03:00
}
2011-02-24 13:54:16 +03:00
2011-03-23 15:13:38 +03:00
// next input sentence
+ + sid ;
+ + actualBatchSize ;
+ + shardPosition ;
} // end of batch loop
2011-10-25 18:17:00 +04:00
if ( examples_in_batch = = 0 ) {
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , batch is empty. " < < endl ;
}
else {
vector < vector < float > > losses ( actualBatchSize ) ;
if ( model_hope_fear ) {
// Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis)
for ( size_t batchPosition = 0 ; batchPosition < actualBatchSize ; + + batchPosition ) {
for ( size_t j = 0 ; j < bleuScores [ batchPosition ] . size ( ) ; + + j ) {
losses [ batchPosition ] . push_back ( oracleBleuScores [ batchPosition ] - bleuScores [ batchPosition ] [ j ] ) ;
}
2011-05-16 20:56:52 +04:00
}
2011-03-23 15:13:38 +03:00
}
2011-11-16 13:13:17 +04:00
// set weight for bleu feature to 0 before optimizing
2011-11-17 20:31:46 +04:00
vector < const ScoreProducer * > : : const_iterator iter = featureFunctions . begin ( ) ;
for ( ; iter ! = featureFunctions . end ( ) ; + + iter )
if ( ( * iter ) - > GetScoreProducerWeightShortName ( ) = = " bl " ) {
mosesWeights . Assign ( * iter , 0 ) ;
break ;
}
2011-03-23 15:13:38 +03:00
2011-10-25 18:17:00 +04:00
// take logs of feature values
if ( logFeatureValues ) {
takeLogs ( featureValuesHope , baseOfLog ) ;
takeLogs ( featureValuesFear , baseOfLog ) ;
takeLogs ( featureValues , baseOfLog ) ;
for ( size_t i = 0 ; i < oracleFeatureValues . size ( ) ; + + i ) {
2011-11-25 17:56:55 +04:00
oracleFeatureValues [ i ] . LogCoreFeatures ( baseOfLog ) ;
2011-10-25 18:17:00 +04:00
}
2011-03-23 15:13:38 +03:00
}
2011-10-25 18:17:00 +04:00
// print out the feature values
if ( print_feature_values ) {
cerr < < " \n Rank " < < rank < < " , epoch " < < epoch < < " , feature values: " < < endl ;
2012-02-21 00:14:19 +04:00
if ( model_hope_fear | | rank_only ) printFeatureValues ( featureValues ) ;
2011-10-25 18:17:00 +04:00
else {
cerr < < " hope: " < < endl ;
printFeatureValues ( featureValuesHope ) ;
cerr < < " fear: " < < endl ;
printFeatureValues ( featureValuesFear ) ;
}
2011-05-16 20:56:52 +04:00
}
2011-04-04 17:03:27 +04:00
2011-10-25 18:17:00 +04:00
// set core features to 0 to avoid updating the feature weights
if ( coreWeightMap . size ( ) > 0 ) {
ignoreCoreFeatures ( featureValues , coreWeightMap ) ;
ignoreCoreFeatures ( featureValuesHope , coreWeightMap ) ;
ignoreCoreFeatures ( featureValuesFear , coreWeightMap ) ;
}
2011-03-23 15:13:38 +03:00
2011-10-25 18:17:00 +04:00
// Run optimiser on batch:
VERBOSE ( 1 , " \n Rank " < < rank < < " , epoch " < < epoch < < " , run optimiser: " < < endl ) ;
size_t update_status ;
2012-01-14 19:56:16 +04:00
ScoreComponentCollection weightUpdate ;
2011-10-25 18:17:00 +04:00
if ( perceptron_update ) {
vector < vector < float > > dummy1 ;
2012-01-14 19:56:16 +04:00
update_status = optimiser - > updateWeightsHopeFear ( mosesWeights , weightUpdate ,
2012-02-20 21:27:35 +04:00
featureValuesHope , featureValuesFear , dummy1 , dummy1 , dummy1 , dummy1 , learning_rate , rank , epoch ) ;
2011-10-25 18:17:00 +04:00
}
else if ( hope_fear ) {
2012-01-23 16:22:12 +04:00
if ( bleuScoresHope [ 0 ] [ 0 ] > = min_oracle_bleu )
if ( hope_n = = 1 & & fear_n = = 1 )
update_status = ( ( MiraOptimiser * ) optimiser ) - > updateWeightsAnalytically ( mosesWeights , weightUpdate ,
featureValuesHope [ 0 ] [ 0 ] , featureValuesFear [ 0 ] [ 0 ] , bleuScoresHope [ 0 ] [ 0 ] , bleuScoresFear [ 0 ] [ 0 ] ,
2012-02-20 21:27:35 +04:00
modelScoresHope [ 0 ] [ 0 ] , modelScoresFear [ 0 ] [ 0 ] , learning_rate , rank , epoch ) ;
2012-01-23 16:22:12 +04:00
else
update_status = optimiser - > updateWeightsHopeFear ( mosesWeights , weightUpdate ,
2012-02-20 21:27:35 +04:00
featureValuesHope , featureValuesFear , bleuScoresHope , bleuScoresFear ,
modelScoresHope , modelScoresFear , learning_rate , rank , epoch ) ;
2012-01-16 20:11:02 +04:00
else
update_status = - 1 ;
2011-10-25 18:17:00 +04:00
}
2012-02-21 00:14:19 +04:00
else if ( rank_only ) {
// learning ranking of model translations
update_status = ( ( MiraOptimiser * ) optimiser ) - > updateWeightsRankModel ( mosesWeights , weightUpdate ,
featureValues , bleuScores , modelScores , learning_rate , rank , epoch ) ;
}
else if ( hope_fear_rank ) {
// hope-fear + learning ranking of model translations
update_status = ( ( MiraOptimiser * ) optimiser ) - > updateWeightsHopeFearAndRankModel ( mosesWeights , weightUpdate ,
featureValuesHope , featureValuesFear , featureValues , bleuScoresHope , bleuScoresFear , bleuScores ,
modelScoresHope , modelScoresFear , modelScores , learning_rate , rank , epoch ) ;
}
2011-10-25 18:17:00 +04:00
else {
// model_hope_fear
2012-01-14 19:56:16 +04:00
update_status = ( ( MiraOptimiser * ) optimiser ) - > updateWeights ( mosesWeights , weightUpdate ,
2012-02-20 21:27:35 +04:00
featureValues , losses , bleuScores , modelScores , oracleFeatureValues , oracleBleuScores , oracleModelScores , learning_rate , rank , epoch ) ;
2011-05-31 22:34:27 +04:00
}
2011-04-08 14:59:41 +04:00
2011-10-25 18:17:00 +04:00
// sumStillViolatedConstraints += update_status;
2011-05-31 22:34:27 +04:00
2011-10-25 18:17:00 +04:00
if ( update_status = = 0 ) { // if weights were updated
2012-01-14 19:56:16 +04:00
// apply weight update
2012-01-23 16:22:12 +04:00
if ( delayUpdates ) {
delayedWeightUpdates . PlusEquals ( weightUpdate ) ;
cerr < < " \n Rank " < < rank < < " , epoch " < < epoch < < " , keeping update: " < < weightUpdate < < endl ;
+ + numberOfUpdatesThisEpoch ;
2011-10-25 18:17:00 +04:00
}
2012-01-23 16:22:12 +04:00
else {
mosesWeights . PlusEquals ( weightUpdate ) ;
if ( normaliseWeights )
mosesWeights . L1Normalise ( ) ;
cumulativeWeights . PlusEquals ( mosesWeights ) ;
+ + numberOfUpdates ;
+ + numberOfUpdatesThisEpoch ;
if ( averageWeights ) {
ScoreComponentCollection averageWeights ( cumulativeWeights ) ;
if ( accumulateWeights ) {
averageWeights . DivideEquals ( numberOfUpdates ) ;
} else {
averageWeights . DivideEquals ( numberOfUpdatesThisEpoch ) ;
}
mosesWeights = averageWeights ;
2011-10-25 18:17:00 +04:00
}
2012-01-23 16:22:12 +04:00
if ( ! delayUpdates )
// set new Moses weights
decoder - > setWeights ( mosesWeights ) ;
2011-06-26 23:12:46 +04:00
}
2011-05-31 19:39:48 +04:00
}
2011-10-25 18:17:00 +04:00
// update history (for approximate document Bleu)
2011-04-26 23:35:06 +04:00
if ( historyOf1best ) {
for ( size_t i = 0 ; i < oneBests . size ( ) ; + + i ) {
2011-05-11 19:12:05 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , update history with 1best length: " < < oneBests [ i ] . size ( ) < < " " ;
2011-10-25 18:17:00 +04:00
}
2011-04-26 23:35:06 +04:00
decoder - > updateHistory ( oneBests , inputLengths , ref_ids , rank , epoch ) ;
}
2011-10-25 18:17:00 +04:00
else if ( historyOfOracles ) {
2011-04-26 23:35:06 +04:00
for ( size_t i = 0 ; i < oracles . size ( ) ; + + i ) {
2011-05-11 19:12:05 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , update history with oracle length: " < < oracles [ i ] . size ( ) < < " " ;
2011-04-26 23:35:06 +04:00
}
decoder - > updateHistory ( oracles , inputLengths , ref_ids , rank , epoch ) ;
}
2011-10-25 18:17:00 +04:00
deleteTranslations ( oracles ) ;
deleteTranslations ( oneBests ) ;
2011-10-26 14:16:45 +04:00
} // END TRANSLATE AND UPDATE OF BATCH
2011-03-23 15:13:38 +03:00
2011-04-13 17:11:57 +04:00
size_t mixing_base = mixingFrequency = = 0 ? 0 : shard . size ( ) / mixingFrequency ;
size_t dumping_base = weightDumpFrequency = = 0 ? 0 : shard . size ( ) / weightDumpFrequency ;
2011-03-28 22:11:45 +04:00
// mix weights?
2011-04-10 23:48:57 +04:00
if ( evaluateModulo ( shardPosition , mixing_base , actualBatchSize ) ) {
2011-03-07 17:12:36 +03:00
# ifdef MPI_ENABLE
2011-03-29 21:08:07 +04:00
ScoreComponentCollection mixedWeights ;
2011-03-23 20:25:33 +03:00
cerr < < " \n Rank " < < rank < < " , before mixing: " < < mosesWeights < < endl ;
2011-03-28 22:11:45 +04:00
// collect all weights in mixedWeights and divide by number of processes
mpi : : reduce ( world , mosesWeights , mixedWeights , SCCPlus ( ) , 0 ) ;
2011-03-23 20:25:33 +03:00
if ( rank = = 0 ) {
// divide by number of processes
2011-03-28 22:11:45 +04:00
mixedWeights . DivideEquals ( size ) ;
2011-03-23 20:25:33 +03:00
// normalise weights after averaging
if ( normaliseWeights ) {
2011-03-28 22:11:45 +04:00
mixedWeights . L1Normalise ( ) ;
cerr < < " Mixed weights (normalised): " < < mixedWeights < < endl ;
2011-03-23 15:13:38 +03:00
}
2011-03-23 20:25:33 +03:00
else {
2011-03-28 22:11:45 +04:00
cerr < < " Mixed weights: " < < mixedWeights < < endl ;
2011-03-23 20:25:33 +03:00
}
}
2011-03-23 15:13:38 +03:00
2011-03-23 20:25:33 +03:00
// broadcast average weights from process 0
2011-03-28 22:11:45 +04:00
mpi : : broadcast ( world , mixedWeights , 0 ) ;
decoder - > setWeights ( mixedWeights ) ;
2011-03-29 21:08:07 +04:00
mosesWeights = mixedWeights ;
2010-11-24 20:06:54 +03:00
# endif
# ifndef MPI_ENABLE
2011-03-29 21:08:07 +04:00
cerr < < " \n Rank " < < rank < < " , no mixing, weights: " < < mosesWeights < < endl ;
2010-12-06 18:28:51 +03:00
# endif
2011-03-23 20:25:33 +03:00
} // end mixing
2011-03-23 15:13:38 +03:00
2011-03-28 22:11:45 +04:00
// Dump weights?
2012-01-23 16:22:12 +04:00
if ( ! delayUpdates & & evaluateModulo ( shardPosition , dumping_base , actualBatchSize ) ) {
2011-11-17 20:31:46 +04:00
ScoreComponentCollection tmpAverageWeights ( cumulativeWeights ) ;
bool proceed = false ;
if ( accumulateWeights ) {
if ( numberOfUpdates > 0 ) {
tmpAverageWeights . DivideEquals ( numberOfUpdates ) ;
proceed = true ;
}
} else {
if ( numberOfUpdatesThisEpoch > 0 ) {
tmpAverageWeights . DivideEquals ( numberOfUpdatesThisEpoch ) ;
proceed = true ;
}
}
if ( proceed ) {
2011-03-08 19:58:02 +03:00
# ifdef MPI_ENABLE
2011-11-17 20:31:46 +04:00
// average across processes
mpi : : reduce ( world , tmpAverageWeights , mixedAverageWeights , SCCPlus ( ) , 0 ) ;
2011-03-08 19:58:02 +03:00
# endif
# ifndef MPI_ENABLE
2011-11-17 20:31:46 +04:00
mixedAverageWeights = tmpAverageWeights ;
2011-03-08 19:58:02 +03:00
# endif
2011-11-17 20:31:46 +04:00
if ( rank = = 0 & & ! weightDumpStem . empty ( ) ) {
// divide by number of processes
mixedAverageWeights . DivideEquals ( size ) ;
// normalise weights after averaging
if ( normaliseWeights ) {
2012-01-23 16:22:12 +04:00
mixedAverageWeights . L1Normalise ( ) ;
2011-11-17 20:31:46 +04:00
}
// dump final average weights
ostringstream filename ;
if ( epoch < 10 ) {
2012-01-23 16:22:12 +04:00
filename < < weightDumpStem < < " _0 " < < epoch ;
2011-11-17 20:31:46 +04:00
} else {
2012-01-23 16:22:12 +04:00
filename < < weightDumpStem < < " _ " < < epoch ;
2011-11-17 20:31:46 +04:00
}
if ( weightDumpFrequency > 1 ) {
2012-01-23 16:22:12 +04:00
filename < < " _ " < < weightEpochDump ;
2011-11-17 20:31:46 +04:00
}
if ( accumulateWeights ) {
2012-01-23 16:22:12 +04:00
cerr < < " \n Mixed average weights (cumulative) during epoch " < < epoch < < " : " < < mixedAverageWeights < < endl ;
2011-11-17 20:31:46 +04:00
} else {
2012-01-23 16:22:12 +04:00
cerr < < " \n Mixed average weights during epoch " < < epoch < < " : " < < mixedAverageWeights < < endl ;
2011-11-17 20:31:46 +04:00
}
cerr < < " Dumping mixed average weights during epoch " < < epoch < < " to " < < filename . str ( ) < < endl < < endl ;
mixedAverageWeights . Save ( filename . str ( ) ) ;
+ + weightEpochDump ;
}
}
2011-04-10 23:48:57 +04:00
} // end dumping
2012-01-14 19:56:16 +04:00
2011-03-23 20:25:33 +03:00
} // end of shard loop, end of this epoch
2011-03-23 15:13:38 +03:00
2012-01-14 19:56:16 +04:00
if ( delayUpdates ) {
// apply all updates from this epoch to the weight vector
ScoreComponentCollection mosesWeights = decoder - > getWeights ( ) ;
2012-01-23 16:22:12 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , delayed update, old moses weights: " < < mosesWeights < < endl ;
2012-01-14 19:56:16 +04:00
mosesWeights . PlusEquals ( delayedWeightUpdates ) ;
2012-01-23 16:22:12 +04:00
cumulativeWeights . PlusEquals ( mosesWeights ) ;
2012-01-14 19:56:16 +04:00
decoder - > setWeights ( mosesWeights ) ;
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , delayed update, new moses weights: " < < mosesWeights < < endl ;
2012-01-23 16:22:12 +04:00
ScoreComponentCollection tmpAverageWeights ( cumulativeWeights ) ;
bool proceed = false ;
if ( accumulateWeights ) {
if ( numberOfUpdatesThisEpoch > 0 ) {
tmpAverageWeights . DivideEquals ( epoch + 1 ) ;
proceed = true ;
}
}
else {
if ( numberOfUpdatesThisEpoch > 0 )
proceed = true ;
}
if ( proceed ) {
# ifdef MPI_ENABLE
// average across processes
mpi : : reduce ( world , tmpAverageWeights , mixedAverageWeights , SCCPlus ( ) , 0 ) ;
# endif
# ifndef MPI_ENABLE
mixedAverageWeights = tmpAverageWeights ;
# endif
if ( rank = = 0 & & ! weightDumpStem . empty ( ) ) {
// divide by number of processes
mixedAverageWeights . DivideEquals ( size ) ;
// normalise weights after averaging
if ( normaliseWeights ) {
mixedAverageWeights . L1Normalise ( ) ;
}
// dump final average weights
ostringstream filename ;
if ( epoch < 10 ) {
filename < < weightDumpStem < < " _0 " < < epoch ;
} else {
filename < < weightDumpStem < < " _ " < < epoch ;
}
if ( weightDumpFrequency > 1 ) {
filename < < " _ " < < weightEpochDump ;
}
if ( accumulateWeights ) {
cerr < < " \n Mixed average weights (cumulative) during epoch " < < epoch < < " : " < < mixedAverageWeights < < endl ;
} else {
cerr < < " \n Mixed average weights during epoch " < < epoch < < " : " < < mixedAverageWeights < < endl ;
}
cerr < < " Dumping mixed average weights during epoch " < < epoch < < " to " < < filename . str ( ) < < endl < < endl ;
mixedAverageWeights . Save ( filename . str ( ) ) ;
+ + weightEpochDump ;
}
}
2012-01-14 19:56:16 +04:00
}
2011-06-26 23:12:46 +04:00
if ( verbosity > 0 ) {
cerr < < " Bleu feature history after epoch " < < epoch < < endl ;
decoder - > printBleuFeatureHistory ( cerr ) ;
}
2011-10-25 18:17:00 +04:00
// cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl;
2011-04-26 23:35:06 +04:00
2011-05-09 13:39:57 +04:00
// Check whether there were any weight updates during this epoch
2011-04-09 01:04:08 +04:00
size_t sumUpdates ;
2011-04-10 23:05:36 +04:00
size_t * sendbuf_uint , * recvbuf_uint ;
sendbuf_uint = ( size_t * ) malloc ( sizeof ( size_t ) ) ;
recvbuf_uint = ( size_t * ) malloc ( sizeof ( size_t ) ) ;
2011-04-09 01:04:08 +04:00
# ifdef MPI_ENABLE
2011-04-10 20:50:28 +04:00
//mpi::reduce(world, numberOfUpdatesThisEpoch, sumUpdates, MPI_SUM, 0);
2011-04-10 23:05:36 +04:00
sendbuf_uint [ 0 ] = numberOfUpdatesThisEpoch ;
recvbuf_uint [ 0 ] = 0 ;
MPI_Reduce ( sendbuf_uint , recvbuf_uint , 1 , MPI_UNSIGNED , MPI_SUM , 0 , world ) ;
sumUpdates = recvbuf_uint [ 0 ] ;
2011-04-09 01:04:08 +04:00
# endif
# ifndef MPI_ENABLE
sumUpdates = numberOfUpdatesThisEpoch ;
# endif
2011-04-10 23:05:36 +04:00
if ( rank = = 0 & & sumUpdates = = 0 ) {
cerr < < " \n No weight updates during this epoch.. stopping. " < < endl ;
stop = true ;
# ifdef MPI_ENABLE
mpi : : broadcast ( world , stop , 0 ) ;
# endif
2011-04-08 14:59:41 +04:00
}
2011-04-09 01:04:08 +04:00
2011-04-10 23:05:36 +04:00
if ( ! stop ) {
2011-04-08 14:59:41 +04:00
// Test if weights have converged
if ( weightConvergence ) {
bool reached = true ;
if ( rank = = 0 & & ( epoch > = 2 ) ) {
ScoreComponentCollection firstDiff ( mixedAverageWeights ) ;
firstDiff . MinusEquals ( mixedAverageWeightsPrevious ) ;
2011-12-23 14:55:54 +04:00
VERBOSE ( 1 , " Average weight changes since previous epoch: " < < firstDiff < <
" (max: " < < firstDiff . GetLInfNorm ( ) < < " ) " < < endl ) ;
2011-04-08 14:59:41 +04:00
ScoreComponentCollection secondDiff ( mixedAverageWeights ) ;
secondDiff . MinusEquals ( mixedAverageWeightsBeforePrevious ) ;
2011-12-23 14:55:54 +04:00
VERBOSE ( 1 , " Average weight changes since before previous epoch: " < < secondDiff < <
" (max: " < < secondDiff . GetLInfNorm ( ) < < " ) " < < endl < < endl ) ;
2011-04-08 14:59:41 +04:00
// check whether stopping criterion has been reached
// (both difference vectors must have all weight changes smaller than min_weight_change)
2011-12-23 14:55:54 +04:00
if ( firstDiff . GetLInfNorm ( ) > = min_weight_change )
reached = false ;
if ( secondDiff . GetLInfNorm ( ) > = min_weight_change )
reached = false ;
2011-04-08 14:59:41 +04:00
if ( reached ) {
// stop MIRA
stop = true ;
2011-06-26 23:12:46 +04:00
cerr < < " \n Weights have converged after epoch " < < epoch < < " .. stopping MIRA. " < < endl ;
2011-04-08 14:59:41 +04:00
ScoreComponentCollection dummy ;
ostringstream endfilename ;
endfilename < < " stopping " ;
dummy . Save ( endfilename . str ( ) ) ;
}
2011-03-23 15:13:38 +03:00
}
2011-03-28 22:11:45 +04:00
2011-04-08 14:59:41 +04:00
mixedAverageWeightsBeforePrevious = mixedAverageWeightsPrevious ;
mixedAverageWeightsPrevious = mixedAverageWeights ;
2011-03-04 15:24:10 +03:00
# ifdef MPI_ENABLE
2011-04-08 14:59:41 +04:00
mpi : : broadcast ( world , stop , 0 ) ;
2011-03-04 15:24:10 +03:00
# endif
2011-04-08 14:59:41 +04:00
} //end if (weightConvergence)
2011-06-27 21:15:03 +04:00
// adjust flexible parameters
if ( ! stop & & epoch > = adapt_after_epoch ) {
// if using flexible slack, decrease slack parameter for next epoch
if ( slack_step > 0 ) {
if ( slack - slack_step > = slack_min ) {
if ( typeid ( * optimiser ) = = typeid ( MiraOptimiser ) ) {
slack - = slack_step ;
VERBOSE ( 1 , " Change slack to: " < < slack < < endl ) ;
( ( MiraOptimiser * ) optimiser ) - > setSlack ( slack ) ;
}
2011-04-08 14:59:41 +04:00
}
2011-03-23 15:13:38 +03:00
}
2011-06-27 21:15:03 +04:00
// if using flexible margin slack, decrease margin slack parameter for next epoch
if ( margin_slack_incr > 0.0001 ) {
if ( typeid ( * optimiser ) = = typeid ( MiraOptimiser ) ) {
margin_slack + = margin_slack_incr ;
VERBOSE ( 1 , " Change margin slack to: " < < margin_slack < < endl ) ;
( ( MiraOptimiser * ) optimiser ) - > setMarginSlack ( margin_slack ) ;
}
2011-06-26 23:12:46 +04:00
}
2011-06-27 21:15:03 +04:00
// change learning rate
if ( ( decrease_learning_rate > 0 ) & & ( learning_rate - decrease_learning_rate > = min_learning_rate ) ) {
learning_rate - = decrease_learning_rate ;
if ( learning_rate < = 0.0001 ) {
learning_rate = 0 ;
stop = true ;
2011-03-18 18:49:48 +03:00
# ifdef MPI_ENABLE
2011-06-27 21:15:03 +04:00
mpi : : broadcast ( world , stop , 0 ) ;
2011-03-18 18:49:48 +03:00
# endif
2011-06-27 21:15:03 +04:00
}
VERBOSE ( 1 , " Change learning rate to " < < learning_rate < < endl ) ;
2011-04-08 14:59:41 +04:00
}
2011-03-23 15:13:38 +03:00
}
}
} // end of epoch loop
2011-03-07 17:12:36 +03:00
2010-12-01 21:09:49 +03:00
# ifdef MPI_ENABLE
2011-03-23 15:13:38 +03:00
MPI_Finalize ( ) ;
2010-12-06 18:28:51 +03:00
# endif
2011-06-26 23:12:46 +04:00
time ( & now ) ;
cerr < < " Rank " < < rank < < " , " < < ctime ( & now ) ;
2010-09-15 18:36:07 +04:00
2011-03-23 15:13:38 +03:00
delete decoder ;
exit ( 0 ) ;
2010-09-15 18:36:07 +04:00
}
2010-09-16 20:23:52 +04:00
2011-06-29 20:18:55 +04:00
bool loadSentences ( const string & filename , vector < string > & sentences ) {
ifstream in ( filename . c_str ( ) ) ;
if ( ! in )
return false ;
string line ;
while ( getline ( in , line ) ) {
sentences . push_back ( line ) ;
}
return true ;
}
2012-02-01 23:47:43 +04:00
bool loadCoreWeights ( const string & filename , ProducerWeightMap & coreWeightMap , const vector < const ScoreProducer * > & featureFunctions ) {
2011-06-29 20:18:55 +04:00
ifstream in ( filename . c_str ( ) ) ;
if ( ! in )
return false ;
string line ;
2012-02-01 23:47:43 +04:00
vector < float > store_weights ;
cerr < < " Loading core weights: " < < endl ;
2011-06-29 20:18:55 +04:00
while ( getline ( in , line ) ) {
// split weight name from value
vector < string > split_line ;
boost : : split ( split_line , line , boost : : is_any_of ( " " ) ) ;
float weight ;
if ( ! from_string < float > ( weight , split_line [ 1 ] , std : : dec ) )
{
2012-02-01 23:47:43 +04:00
cerr < < " reading in float failed.. " < < endl ;
2011-06-29 20:18:55 +04:00
return false ;
}
2012-02-01 23:47:43 +04:00
// find producer for this score
string name = split_line [ 0 ] ;
for ( size_t i = 0 ; i < featureFunctions . size ( ) ; + + i ) {
std : : string prefix = featureFunctions [ i ] - > GetScoreProducerDescription ( ) ;
if ( name . substr ( 0 , prefix . length ( ) ) . compare ( prefix ) = = 0 ) {
if ( featureFunctions [ i ] - > GetNumScoreComponents ( ) = = 1 ) {
vector < float > weights ;
weights . push_back ( weight ) ;
coreWeightMap . insert ( ProducerWeightPair ( featureFunctions [ i ] , weights ) ) ;
cerr < < " insert 1 weight for " < < featureFunctions [ i ] - > GetScoreProducerDescription ( ) ;
cerr < < " ( " < < weight < < " ) " < < endl ;
}
else {
store_weights . push_back ( weight ) ;
if ( store_weights . size ( ) = = featureFunctions [ i ] - > GetNumScoreComponents ( ) ) {
coreWeightMap . insert ( ProducerWeightPair ( featureFunctions [ i ] , store_weights ) ) ;
cerr < < " insert " < < store_weights . size ( ) < < " weights for " < < featureFunctions [ i ] - > GetScoreProducerDescription ( ) < < " ( " ;
for ( size_t j = 0 ; j < store_weights . size ( ) ; + + j )
cerr < < store_weights [ j ] < < " " ;
cerr < < " ) " < < endl ;
store_weights . clear ( ) ;
}
}
}
}
2011-06-29 20:18:55 +04:00
}
return true ;
}
bool evaluateModulo ( size_t shard_position , size_t mix_or_dump_base , size_t actual_batch_size ) {
if ( mix_or_dump_base = = 0 ) return 0 ;
if ( actual_batch_size > 1 ) {
bool mix_or_dump = false ;
size_t numberSubtracts = actual_batch_size ;
do {
if ( shard_position % mix_or_dump_base = = 0 ) {
mix_or_dump = true ;
break ;
}
- - shard_position ;
- - numberSubtracts ;
} while ( numberSubtracts > 0 ) ;
return mix_or_dump ;
}
else {
return ( ( shard_position % mix_or_dump_base ) = = 0 ) ;
}
}
void printFeatureValues ( vector < vector < ScoreComponentCollection > > & featureValues ) {
for ( size_t i = 0 ; i < featureValues . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < featureValues [ i ] . size ( ) ; + + j ) {
cerr < < featureValues [ i ] [ j ] < < endl ;
}
}
cerr < < endl ;
}
2012-02-01 23:47:43 +04:00
void ignoreCoreFeatures ( vector < vector < ScoreComponentCollection > > & featureValues , ProducerWeightMap & coreWeightMap ) {
2012-01-12 20:26:16 +04:00
for ( size_t i = 0 ; i < featureValues . size ( ) ; + + i )
2011-06-29 20:18:55 +04:00
for ( size_t j = 0 ; j < featureValues [ i ] . size ( ) ; + + j ) {
// set all core features to 0
2012-02-01 23:47:43 +04:00
ProducerWeightMap : : iterator p ;
for ( p = coreWeightMap . begin ( ) ; p ! = coreWeightMap . end ( ) ; + + p ) {
if ( ( p - > first ) - > GetNumScoreComponents ( ) = = 1 )
featureValues [ i ] [ j ] . Assign ( p - > first , 0 ) ;
else {
vector < float > weights ;
for ( size_t k = 0 ; k < ( p - > first ) - > GetNumScoreComponents ( ) ; + + k )
weights . push_back ( 0 ) ;
featureValues [ i ] [ j ] . Assign ( p - > first , weights ) ;
}
}
2011-06-29 20:18:55 +04:00
}
2012-01-12 20:26:16 +04:00
}
2011-06-29 20:18:55 +04:00
void takeLogs ( vector < vector < ScoreComponentCollection > > & featureValues , size_t base ) {
for ( size_t i = 0 ; i < featureValues . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < featureValues [ i ] . size ( ) ; + + j ) {
2011-11-25 17:56:55 +04:00
featureValues [ i ] [ j ] . LogCoreFeatures ( base ) ;
2011-06-29 20:18:55 +04:00
}
}
}
void deleteTranslations ( vector < vector < const Word * > > & translations ) {
for ( size_t i = 0 ; i < translations . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < translations [ i ] . size ( ) ; + + j ) {
delete translations [ i ] [ j ] ;
}
}
}