2010-09-15 18:36:07 +04:00
/***********************************************************************
2011-03-23 15:13:38 +03:00
Moses - factored phrase - based language decoder
Copyright ( C ) 2010 University of Edinburgh
2010-09-15 18:36:07 +04:00
2011-03-23 15:13:38 +03:00
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
2010-09-15 18:36:07 +04:00
2011-03-23 15:13:38 +03:00
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
2010-09-15 18:36:07 +04:00
2011-03-23 15:13:38 +03:00
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-09-15 18:36:07 +04:00
2010-09-28 19:13:50 +04:00
# include <algorithm>
2010-09-15 19:38:46 +04:00
# include <cstdlib>
# include <ctime>
2010-09-15 18:36:07 +04:00
# include <string>
# include <vector>
# include <boost/program_options.hpp>
2011-02-24 13:54:16 +03:00
# include <boost/algorithm/string.hpp>
2010-09-28 19:13:50 +04:00
# ifdef MPI_ENABLE
# include <boost/mpi.hpp>
namespace mpi = boost : : mpi ;
# endif
2010-09-15 18:36:07 +04:00
2010-09-15 19:38:46 +04:00
# include "FeatureVector.h"
2010-09-15 18:36:07 +04:00
# include "StaticData.h"
2010-09-16 20:23:52 +04:00
# include "ChartTrellisPathList.h"
2010-09-17 11:35:31 +04:00
# include "ChartTrellisPath.h"
# include "ScoreComponentCollection.h"
2010-09-15 18:36:07 +04:00
# include "Decoder.h"
2010-09-15 19:38:46 +04:00
# include "Optimiser.h"
2011-04-22 23:17:33 +04:00
# include "Hildreth.h"
2010-09-15 18:36:07 +04:00
using namespace Mira ;
using namespace std ;
using namespace Moses ;
namespace po = boost : : program_options ;
2011-03-23 15:13:38 +03:00
void OutputNBestList ( const MosesChart : : TrellisPathList & nBestList ,
const TranslationSystem * system , long translationId ) ;
2010-09-16 20:23:52 +04:00
2010-09-15 19:38:46 +04:00
bool loadSentences ( const string & filename , vector < string > & sentences ) {
2011-03-23 15:13:38 +03:00
ifstream in ( filename . c_str ( ) ) ;
if ( ! in )
return false ;
string line ;
while ( getline ( in , line ) ) {
sentences . push_back ( line ) ;
}
return true ;
2010-09-15 19:38:46 +04:00
}
2011-04-10 23:48:57 +04:00
bool evaluateModulo ( size_t shard_position , size_t mix_or_dump_base , size_t actual_batch_size ) {
2011-04-13 17:11:57 +04:00
if ( mix_or_dump_base = = 0 ) return 0 ;
2011-04-10 23:48:57 +04:00
if ( actual_batch_size > 1 ) {
bool mix_or_dump = false ;
size_t numberSubtracts = actual_batch_size ;
do {
if ( shard_position % mix_or_dump_base = = 0 ) {
mix_or_dump = true ;
break ;
}
- - shard_position ;
- - numberSubtracts ;
} while ( numberSubtracts > 0 ) ;
return mix_or_dump ;
}
else {
return ( ( shard_position % mix_or_dump_base ) = = 0 ) ;
}
}
2010-09-28 19:13:50 +04:00
struct RandomIndex {
2011-03-23 15:13:38 +03:00
ptrdiff_t operator ( ) ( ptrdiff_t max ) {
2011-04-22 23:17:33 +04:00
srand ( time ( 0 ) ) ; // Initialize random number generator with current time.
2011-03-23 15:13:38 +03:00
return static_cast < ptrdiff_t > ( rand ( ) % max ) ;
}
2010-09-28 19:13:50 +04:00
} ;
2011-04-22 23:17:33 +04:00
void shuffleInput ( vector < size_t > & order , size_t size , size_t inputSize ) {
cerr < < " Shuffling input examples.. " < < endl ;
// RandomIndex rindex;
// random_shuffle(order.begin(), order.end(), rindex);
// remove first element and put it in the back
size_t first = order . at ( 0 ) ;
size_t index = 0 ;
order . erase ( order . begin ( ) ) ;
order . push_back ( first ) ;
}
void createShard ( vector < size_t > & order , size_t size , size_t rank , vector < size_t > & shard ) {
// Create the shards according to the number of processes used
float shardSize = ( float ) ( order . size ( ) ) / size ;
size_t shardStart = ( size_t ) ( shardSize * rank ) ;
size_t shardEnd = ( size_t ) ( shardSize * ( rank + 1 ) ) ;
if ( rank = = size - 1 )
shardEnd = order . size ( ) ;
shard . resize ( shardSize ) ;
copy ( order . begin ( ) + shardStart , order . begin ( ) + shardEnd , shard . begin ( ) ) ;
cerr < < " order: " ;
for ( size_t i = 0 ; i < shard . size ( ) ; + + i ) {
cerr < < shard [ i ] < < " " ;
}
cerr < < endl ;
}
2010-09-15 18:36:07 +04:00
int main ( int argc , char * * argv ) {
2011-03-23 15:13:38 +03:00
size_t rank = 0 ;
size_t size = 1 ;
2010-09-28 19:13:50 +04:00
# ifdef MPI_ENABLE
2011-03-23 15:13:38 +03:00
mpi : : environment env ( argc , argv ) ;
mpi : : communicator world ;
rank = world . rank ( ) ;
size = world . size ( ) ;
2010-09-28 19:13:50 +04:00
# endif
2011-03-23 15:13:38 +03:00
cerr < < " Rank: " < < rank < < " Size: " < < size < < endl ;
bool help ;
int verbosity ;
string mosesConfigFile ;
string inputFile ;
vector < string > referenceFiles ;
size_t epochs ;
string learner ;
bool shuffle ;
bool hildreth ;
size_t mixingFrequency ;
size_t weightDumpFrequency ;
string weightDumpStem ;
float marginScaleFactor ;
float marginScaleFactorStep ;
float marginScaleFactorMin ;
float min_learning_rate ;
float min_sentence_update ;
2011-04-26 23:35:06 +04:00
size_t weightedLossFunction ;
2011-03-23 15:13:38 +03:00
size_t n ;
2011-05-01 18:17:40 +04:00
size_t nbest_first ;
2011-03-23 15:13:38 +03:00
size_t batchSize ;
bool distinctNbest ;
bool onlyViolatedConstraints ;
bool accumulateWeights ;
float historySmoothing ;
bool useScaledReference ;
bool scaleByInputLength ;
float BPfactor ;
bool adapt_BPfactor ;
float slack ;
float slack_step ;
2011-05-09 13:39:57 +04:00
float slack_min ;
2011-03-23 15:13:38 +03:00
size_t maxNumberOracles ;
bool accumulateMostViolatedConstraints ;
2011-04-15 15:34:51 +04:00
bool averageWeights ;
2011-03-23 15:13:38 +03:00
bool pastAndCurrentConstraints ;
bool weightConvergence ;
bool controlUpdates ;
float learning_rate ;
bool logFeatureValues ;
size_t baseOfLog ;
string decoder_settings ;
float min_weight_change ;
float max_sentence_update ;
float decrease_learning_rate ;
float decrease_sentence_update ;
bool devBleu ;
bool normaliseWeights ;
bool print_feature_values ;
bool stop_dev_bleu ;
bool stop_approx_dev_bleu ;
2011-04-22 23:17:33 +04:00
bool train_linear_classifier ;
2011-04-15 15:34:51 +04:00
int updates_per_epoch ;
2011-04-22 23:17:33 +04:00
bool multiplyA ;
2011-04-26 23:35:06 +04:00
bool historyOf1best ;
bool burnIn ;
string burnInInputFile ;
vector < string > burnInReferenceFiles ;
bool sentenceLevelBleu ;
float bleuScoreWeight ;
2011-05-04 22:26:20 +04:00
float precision ;
2011-05-09 13:39:57 +04:00
float min_bleu_change ;
bool analytical_update ;
2011-05-10 21:17:19 +04:00
bool perceptron_update ;
2011-05-12 18:21:03 +04:00
bool hope_fear ;
2011-05-09 13:39:57 +04:00
size_t constraints ;
2011-03-23 15:13:38 +03:00
po : : options_description desc ( " Allowed options " ) ;
2011-04-26 23:35:06 +04:00
desc . add_options ( )
( " accumulate-most-violated-constraints " , po : : value < bool > ( & accumulateMostViolatedConstraints ) - > default_value ( false ) , " Accumulate most violated constraint per example " )
2011-04-08 16:52:14 +04:00
( " accumulate-weights " , po : : value < bool > ( & accumulateWeights ) - > default_value ( false ) , " Accumulate and average weights over all epochs " )
2011-03-23 15:24:52 +03:00
( " adapt-BP-factor " , po : : value < bool > ( & adapt_BPfactor ) - > default_value ( 0 ) , " Set factor to 1 when optimal translation length in reached " )
2011-05-09 13:39:57 +04:00
( " analytical-update " , po : : value < bool > ( & analytical_update ) - > default_value ( 0 ) , " Use one best lists and compute the update analytically " )
2011-04-08 17:11:35 +04:00
( " average-weights " , po : : value < bool > ( & averageWeights ) - > default_value ( false ) , " Set decoder weights to average weights after each update " )
2011-03-23 15:24:52 +03:00
( " base-of-log " , po : : value < size_t > ( & baseOfLog ) - > default_value ( 10 ) , " Base for log-ing feature values " )
( " batch-size,b " , po : : value < size_t > ( & batchSize ) - > default_value ( 1 ) , " Size of batch that is send to optimiser for weight adjustments " )
2011-04-26 23:35:06 +04:00
( " bleu-score-weight " , po : : value < float > ( & bleuScoreWeight ) - > default_value ( 1.0 ) , " Bleu score weight used in the decoder objective function (on top of the bleu objective weight) " )
2011-03-23 15:24:52 +03:00
( " BP-factor " , po : : value < float > ( & BPfactor ) - > default_value ( 1.0 ) , " Increase penalty for short translations " )
2011-04-26 23:35:06 +04:00
( " burn-in " , po : : value < bool > ( & burnIn ) - > default_value ( false ) , " Do a burn-in of the BLEU history before training " )
( " burn-in-input-file " , po : : value < string > ( & burnInInputFile ) , " Input file for burn-in phase of BLEU history " )
( " burn-in-reference-files " , po : : value < vector < string > > ( & burnInReferenceFiles ) , " Reference file for burn-in phase of BLEU history " )
2011-03-23 15:24:52 +03:00
( " config,f " , po : : value < string > ( & mosesConfigFile ) , " Moses ini file " )
2011-05-09 13:39:57 +04:00
( " constraints " , po : : value < size_t > ( & constraints ) - > default_value ( 1 ) , " Number of constraints used for analytical update " )
2011-04-08 16:52:14 +04:00
( " control-updates " , po : : value < bool > ( & controlUpdates ) - > default_value ( true ) , " Ignore updates that increase number of violated constraints AND increase the error " )
2011-03-23 15:24:52 +03:00
( " decoder-settings " , po : : value < string > ( & decoder_settings ) - > default_value ( " " ) , " Decoder settings for tuning runs " )
( " decr-learning-rate " , po : : value < float > ( & decrease_learning_rate ) - > default_value ( 0 ) , " Decrease learning rate by the given value after every epoch " )
( " decr-sentence-update " , po : : value < float > ( & decrease_sentence_update ) - > default_value ( 0 ) , " Decrease maximum weight update by the given value after every epoch " )
( " dev-bleu " , po : : value < bool > ( & devBleu ) - > default_value ( true ) , " Compute BLEU score of oracle translations of the whole tuning set " )
2011-04-08 16:52:14 +04:00
( " distinct-nbest " , po : : value < bool > ( & distinctNbest ) - > default_value ( true ) , " Use nbest list with distinct translations in inference step " )
2011-03-28 22:11:45 +04:00
( " weight-dump-frequency " , po : : value < size_t > ( & weightDumpFrequency ) - > default_value ( 1 ) , " How often per epoch to dump weights, when using mpi " )
2011-03-23 15:24:52 +03:00
( " epochs,e " , po : : value < size_t > ( & epochs ) - > default_value ( 5 ) , " Number of epochs " )
( " help " , po : : value ( & help ) - > zero_tokens ( ) - > default_value ( false ) , " Print this help message and exit " )
( " hildreth " , po : : value < bool > ( & hildreth ) - > default_value ( true ) , " Use Hildreth's optimisation algorithm " )
2011-04-26 23:35:06 +04:00
( " history-of-1best " , po : : value < bool > ( & historyOf1best ) - > default_value ( 0 ) , " Use the 1best translation to update the history " )
2011-03-23 15:24:52 +03:00
( " history-smoothing " , po : : value < float > ( & historySmoothing ) - > default_value ( 0.9 ) , " Adjust the factor for history smoothing " )
2011-05-12 18:21:03 +04:00
( " hope-fear " , po : : value < bool > ( & hope_fear ) - > default_value ( true ) , " Use only hope and fear translations (not model) " )
2011-03-23 15:24:52 +03:00
( " input-file,i " , po : : value < string > ( & inputFile ) , " Input file containing tokenised source " )
( " learner,l " , po : : value < string > ( & learner ) - > default_value ( " mira " ) , " Learning algorithm " )
( " learning-rate " , po : : value < float > ( & learning_rate ) - > default_value ( 1 ) , " Learning rate (fixed or flexible) " )
( " log-feature-values " , po : : value < bool > ( & logFeatureValues ) - > default_value ( false ) , " Take log of feature values according to the given base. " )
( " max-number-oracles " , po : : value < size_t > ( & maxNumberOracles ) - > default_value ( 1 ) , " Set a maximum number of oracles to use per example " )
2011-05-09 13:39:57 +04:00
( " min-bleu-change " , po : : value < float > ( & min_bleu_change ) - > default_value ( 0 ) , " Minimum BLEU change of 1best translations of one epoch " )
2011-03-23 15:24:52 +03:00
( " min-sentence-update " , po : : value < float > ( & min_sentence_update ) - > default_value ( 0 ) , " Set a minimum weight update per sentence " )
( " min-learning-rate " , po : : value < float > ( & min_learning_rate ) - > default_value ( 0 ) , " Set a minimum learning rate " )
( " max-sentence-update " , po : : value < float > ( & max_sentence_update ) - > default_value ( - 1 ) , " Set a maximum weight update per sentence " )
( " min-weight-change " , po : : value < float > ( & min_weight_change ) - > default_value ( 0.01 ) , " Set minimum weight change for stopping criterion " )
( " mixing-frequency " , po : : value < size_t > ( & mixingFrequency ) - > default_value ( 1 ) , " How often per epoch to mix weights, when using mpi " )
( " msf " , po : : value < float > ( & marginScaleFactor ) - > default_value ( 1.0 ) , " Margin scale factor, regularises the update by scaling the enforced margin " )
( " msf-min " , po : : value < float > ( & marginScaleFactorMin ) - > default_value ( 1.0 ) , " Minimum value that margin is scaled by " )
( " msf-step " , po : : value < float > ( & marginScaleFactorStep ) - > default_value ( 0 ) , " Decrease margin scale factor iteratively by the value provided " )
2011-04-26 23:35:06 +04:00
( " multiplyA " , po : : value < bool > ( & multiplyA ) - > default_value ( true ) , " Multiply A with outcome before passing to Hildreth " )
( " nbest,n " , po : : value < size_t > ( & n ) - > default_value ( 10 ) , " Number of translations in nbest list " )
2011-05-01 18:17:40 +04:00
( " nbest-first " , po : : value < size_t > ( & nbest_first ) - > default_value ( 0 ) , " Number of translations in nbest list in the first epoch " )
2011-03-23 15:24:52 +03:00
( " normalise " , po : : value < bool > ( & normaliseWeights ) - > default_value ( false ) , " Whether to normalise the updated weights before passing them to the decoder " )
2011-04-15 15:34:51 +04:00
( " only-violated-constraints " , po : : value < bool > ( & onlyViolatedConstraints ) - > default_value ( false ) , " Add only violated constraints to the optimisation problem " )
2011-03-23 15:24:52 +03:00
( " past-and-current-constraints " , po : : value < bool > ( & pastAndCurrentConstraints ) - > default_value ( false ) , " Accumulate most violated constraint per example and use them along all current constraints " )
2011-05-10 21:17:19 +04:00
( " perceptron-update " , po : : value < bool > ( & perceptron_update ) - > default_value ( false ) , " Do a simple perceptron style update " )
2011-05-12 18:21:03 +04:00
( " precision " , po : : value < float > ( & precision ) - > default_value ( 0 ) , " Precision when comparing left and right hand side of constraints " )
2011-03-23 15:24:52 +03:00
( " print-feature-values " , po : : value < bool > ( & print_feature_values ) - > default_value ( false ) , " Print out feature values " )
( " reference-files,r " , po : : value < vector < string > > ( & referenceFiles ) , " Reference translation files for training " )
( " scale-by-input-length " , po : : value < bool > ( & scaleByInputLength ) - > default_value ( true ) , " Scale the BLEU score by a history of the input lengths " )
2011-04-26 23:35:06 +04:00
( " sentence-level-bleu " , po : : value < bool > ( & sentenceLevelBleu ) - > default_value ( false ) , " Use a sentences level bleu scoring function " )
2011-03-23 15:24:52 +03:00
( " shuffle " , po : : value < bool > ( & shuffle ) - > default_value ( false ) , " Shuffle input sentences before processing " )
( " slack " , po : : value < float > ( & slack ) - > default_value ( 0.01 ) , " Use slack in optimizer " )
2011-05-09 13:39:57 +04:00
( " slack-min " , po : : value < float > ( & slack_min ) - > default_value ( 0.01 ) , " Minimum slack used " )
2011-03-23 15:24:52 +03:00
( " slack-step " , po : : value < float > ( & slack_step ) - > default_value ( 0 ) , " Increase slack from epoch to epoch by the value provided " )
2011-04-08 14:59:41 +04:00
( " stop-dev-bleu " , po : : value < bool > ( & stop_dev_bleu ) - > default_value ( false ) , " Stop when average Bleu (dev) decreases (or no more increases) " )
( " stop-approx-dev-bleu " , po : : value < bool > ( & stop_approx_dev_bleu ) - > default_value ( false ) , " Stop when average approx. sentence Bleu (dev) decreases (or no more increases) " )
2011-05-10 21:25:46 +04:00
( " stop-weights " , po : : value < bool > ( & weightConvergence ) - > default_value ( true ) , " Stop when weights converge " )
2011-05-09 13:39:57 +04:00
( " train-linear-classifier " , po : : value < bool > ( & train_linear_classifier ) - > default_value ( false ) , " Test algorithm for linear classification " )
2011-03-28 22:11:45 +04:00
( " updates-per-epoch " , po : : value < int > ( & updates_per_epoch ) - > default_value ( - 1 ) , " Accumulate updates and apply them to the weight vector the specified number of times per epoch " )
2011-03-23 15:24:52 +03:00
( " use-scaled-reference " , po : : value < bool > ( & useScaledReference ) - > default_value ( true ) , " Use scaled reference length for comparing target and reference length of phrases " )
( " verbosity,v " , po : : value < int > ( & verbosity ) - > default_value ( 0 ) , " Verbosity level " )
2011-04-26 23:35:06 +04:00
( " weighted-loss-function " , po : : value < size_t > ( & weightedLossFunction ) - > default_value ( 0 ) , " Weight the loss of a hypothesis by its Bleu score " )
2011-03-23 15:24:52 +03:00
( " weight-dump-stem " , po : : value < string > ( & weightDumpStem ) - > default_value ( " weights " ) , " Stem of filename to use for dumping weights " ) ;
2011-03-23 15:13:38 +03:00
po : : options_description cmdline_options ;
cmdline_options . add ( desc ) ;
po : : variables_map vm ;
po : : store (
po : : command_line_parser ( argc , argv ) . options ( cmdline_options ) . run ( ) , vm ) ;
po : : notify ( vm ) ;
if ( help ) {
std : : cout < < " Usage: " + string ( argv [ 0 ] )
+ " -f mosesini-file -i input-file -r reference-file(s) [options] "
< < std : : endl ;
std : : cout < < desc < < std : : endl ;
return 0 ;
}
if ( mosesConfigFile . empty ( ) ) {
cerr < < " Error: No moses ini file specified " < < endl ;
return 1 ;
}
if ( inputFile . empty ( ) ) {
cerr < < " Error: No input file specified " < < endl ;
return 1 ;
}
if ( ! referenceFiles . size ( ) ) {
cerr < < " Error: No reference files specified " < < endl ;
return 1 ;
}
2011-04-22 23:17:33 +04:00
if ( accumulateMostViolatedConstraints & & pastAndCurrentConstraints ) {
cerr < < " Error: the parameters --accumulate-most-violated-constraints and --past-and-current-constraints are mutually exclusive " < < endl ;
return 1 ;
}
2011-05-01 18:17:40 +04:00
if ( nbest_first = = 0 ) {
nbest_first = n ;
}
2011-03-23 15:13:38 +03:00
// load input and references
vector < string > inputSentences ;
if ( ! loadSentences ( inputFile , inputSentences ) ) {
cerr < < " Error: Failed to load input sentences from " < < inputFile < < endl ;
return 1 ;
}
vector < vector < string > > referenceSentences ( referenceFiles . size ( ) ) ;
for ( size_t i = 0 ; i < referenceFiles . size ( ) ; + + i ) {
if ( ! loadSentences ( referenceFiles [ i ] , referenceSentences [ i ] ) ) {
cerr < < " Error: Failed to load reference sentences from "
< < referenceFiles [ i ] < < endl ;
return 1 ;
}
if ( referenceSentences [ i ] . size ( ) ! = inputSentences . size ( ) ) {
cerr < < " Error: Input file length ( " < < inputSentences . size ( ) < < " ) != ( "
< < referenceSentences [ i ] . size ( ) < < " ) length of reference file " < < i
< < endl ;
return 1 ;
}
}
// initialise Moses
vector < string > decoder_params ;
boost : : split ( decoder_params , decoder_settings , boost : : is_any_of ( " \t " ) ) ;
initMoses ( mosesConfigFile , verbosity , decoder_params . size ( ) , decoder_params ) ;
2011-04-26 23:35:06 +04:00
MosesDecoder * decoder = new MosesDecoder ( useScaledReference , scaleByInputLength , BPfactor , historySmoothing ) ;
2011-03-23 15:13:38 +03:00
if ( normaliseWeights ) {
ScoreComponentCollection startWeights = decoder - > getWeights ( ) ;
startWeights . L1Normalise ( ) ;
decoder - > setWeights ( startWeights ) ;
}
2011-04-26 23:35:06 +04:00
if ( sentenceLevelBleu ) {
burnIn = false ;
2011-05-09 13:39:57 +04:00
cerr < < " Burn-in not needed when using sentence-level BLEU, deactivating burn-in. " < < endl ;
}
2011-05-10 21:17:19 +04:00
if ( perceptron_update | | analytical_update ) {
2011-05-09 13:39:57 +04:00
batchSize = 1 ;
2011-05-10 21:17:19 +04:00
cerr < < " Setting batch size to 1 for perceptron/analytical update " < < endl ;
2011-04-26 23:35:06 +04:00
}
if ( burnIn ) {
// load burn-in input and references
vector < string > burnInInputSentences ;
if ( ! loadSentences ( burnInInputFile , burnInInputSentences ) ) {
cerr < < " Error: Failed to load burn-in input sentences from " < < burnInInputFile < < endl ;
return 1 ;
}
vector < vector < string > > burnInReferenceSentences ( burnInReferenceFiles . size ( ) ) ;
for ( size_t i = 0 ; i < burnInReferenceFiles . size ( ) ; + + i ) {
if ( ! loadSentences ( burnInReferenceFiles [ i ] , burnInReferenceSentences [ i ] ) ) {
cerr < < " Error: Failed to load burn-in reference sentences from "
< < burnInReferenceFiles [ i ] < < endl ;
return 1 ;
}
if ( burnInReferenceSentences [ i ] . size ( ) ! = burnInInputSentences . size ( ) ) {
cerr < < " Error: Burn-in input file length ( " < < burnInInputSentences . size ( ) < < " ) != ( "
< < burnInReferenceSentences [ i ] . size ( ) < < " ) length of burn-in reference file " < < i
< < endl ;
return 1 ;
}
}
decoder - > loadReferenceSentences ( burnInReferenceSentences ) ;
vector < size_t > inputLengths ;
vector < size_t > ref_ids ;
vector < vector < const Word * > > oracles ;
vector < vector < const Word * > > oneBests ;
vector < vector < ScoreComponentCollection > > featureValues ;
vector < vector < float > > bleuScores ;
vector < ScoreComponentCollection > newFeatureValues ;
vector < float > newBleuScores ;
featureValues . push_back ( newFeatureValues ) ;
bleuScores . push_back ( newBleuScores ) ;
vector < size_t > order ;
for ( size_t i = 0 ; i < burnInInputSentences . size ( ) ; + + i ) {
order . push_back ( i ) ;
}
2011-05-09 13:39:57 +04:00
cerr < < " Rank " < < rank < < " , starting burn-in phase for approx. BLEU history.. " < < endl ;
2011-04-26 23:35:06 +04:00
if ( historyOf1best ) {
// get 1best translations for the burn-in sentences
vector < size_t > : : const_iterator sid = order . begin ( ) ;
while ( sid ! = order . end ( ) ) {
string & input = burnInInputSentences [ * sid ] ;
vector < const Word * > bestModel = decoder - > getNBest ( input , * sid , 1 , 0.0 , bleuScoreWeight ,
featureValues [ 0 ] , bleuScores [ 0 ] , true ,
distinctNbest , rank ) ;
inputLengths . push_back ( decoder - > getCurrentInputLength ( ) ) ;
ref_ids . push_back ( * sid ) ;
decoder - > cleanup ( ) ;
oneBests . push_back ( bestModel ) ;
+ + sid ;
}
// update history
decoder - > updateHistory ( oneBests , inputLengths , ref_ids , rank , 0 ) ;
// clean up 1best translations after updating history
for ( size_t i = 0 ; i < oracles . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < oracles [ i ] . size ( ) ; + + j ) {
delete oracles [ i ] [ j ] ;
}
}
}
else {
// get oracle translations for the burn-in sentences
vector < size_t > : : const_iterator sid = order . begin ( ) ;
while ( sid ! = order . end ( ) ) {
string & input = burnInInputSentences [ * sid ] ;
vector < const Word * > oracle = decoder - > getNBest ( input , * sid , 1 , 1.0 , bleuScoreWeight ,
featureValues [ 0 ] , bleuScores [ 0 ] , true ,
distinctNbest , rank ) ;
inputLengths . push_back ( decoder - > getCurrentInputLength ( ) ) ;
ref_ids . push_back ( * sid ) ;
decoder - > cleanup ( ) ;
oracles . push_back ( oracle ) ;
+ + sid ;
}
// update history
decoder - > updateHistory ( oracles , inputLengths , ref_ids , rank , 0 ) ;
// clean up oracle translations after updating history
for ( size_t i = 0 ; i < oracles . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < oracles [ i ] . size ( ) ; + + j ) {
delete oracles [ i ] [ j ] ;
}
}
}
cerr < < " Bleu feature history after burn-in: " < < endl ;
decoder - > printBleuFeatureHistory ( cerr ) ;
decoder - > loadReferenceSentences ( referenceSentences ) ;
}
else {
decoder - > loadReferenceSentences ( referenceSentences ) ;
}
2011-03-23 15:13:38 +03:00
// Optionally shuffle the sentences
vector < size_t > order ;
if ( rank = = 0 ) {
for ( size_t i = 0 ; i < inputSentences . size ( ) ; + + i ) {
order . push_back ( i ) ;
}
if ( shuffle ) {
cerr < < " Shuffling input sentences.. " < < endl ;
RandomIndex rindex ;
random_shuffle ( order . begin ( ) , order . end ( ) , rindex ) ;
}
}
2010-09-28 19:13:50 +04:00
# ifdef MPI_ENABLE
2011-03-23 15:13:38 +03:00
mpi : : broadcast ( world , order , 0 ) ;
2010-09-28 19:13:50 +04:00
# endif
2011-03-23 15:13:38 +03:00
// Create the shards according to the number of processes used
vector < size_t > shard ;
float shardSize = ( float ) ( order . size ( ) ) / size ;
VERBOSE ( 1 , " Shard size: " < < shardSize < < endl ) ;
size_t shardStart = ( size_t ) ( shardSize * rank ) ;
size_t shardEnd = ( size_t ) ( shardSize * ( rank + 1 ) ) ;
if ( rank = = size - 1 )
shardEnd = order . size ( ) ;
VERBOSE ( 1 , " Rank: " < < rank < < " Shard start: " < < shardStart < < " Shard end: " < < shardEnd < < endl ) ;
shard . resize ( shardSize ) ;
copy ( order . begin ( ) + shardStart , order . begin ( ) + shardEnd , shard . begin ( ) ) ;
Optimiser * optimiser = NULL ;
cerr < < " adapt-BP-factor: " < < adapt_BPfactor < < endl ;
2011-04-08 14:59:41 +04:00
cerr < < " control-updates: " < < controlUpdates < < endl ;
2011-03-23 15:13:38 +03:00
cerr < < " mix-frequency: " < < mixingFrequency < < endl ;
2011-03-28 22:11:45 +04:00
cerr < < " weight-dump-frequency: " < < weightDumpFrequency < < endl ;
2011-03-23 15:13:38 +03:00
cerr < < " weight-dump-stem: " < < weightDumpStem < < endl ;
cerr < < " shuffle: " < < shuffle < < endl ;
cerr < < " hildreth: " < < hildreth < < endl ;
cerr < < " msf: " < < marginScaleFactor < < endl ;
cerr < < " msf-step: " < < marginScaleFactorStep < < endl ;
cerr < < " msf-min: " < < marginScaleFactorMin < < endl ;
cerr < < " weighted-loss-function: " < < weightedLossFunction < < endl ;
cerr < < " nbest: " < < n < < endl ;
2011-05-01 18:17:40 +04:00
cerr < < " nbest-first: " < < nbest_first < < endl ;
2011-03-23 15:13:38 +03:00
cerr < < " batch-size: " < < batchSize < < endl ;
cerr < < " distinct-nbest: " < < distinctNbest < < endl ;
cerr < < " only-violated-constraints: " < < onlyViolatedConstraints < < endl ;
cerr < < " accumulate-weights: " < < accumulateWeights < < endl ;
cerr < < " history-smoothing: " < < historySmoothing < < endl ;
cerr < < " use-scaled-reference: " < < useScaledReference < < endl ;
cerr < < " scale-by-input-length: " < < scaleByInputLength < < endl ;
cerr < < " BP-factor: " < < BPfactor < < endl ;
cerr < < " slack: " < < slack < < endl ;
cerr < < " slack-step: " < < slack_step < < endl ;
2011-05-09 13:39:57 +04:00
cerr < < " slack-min: " < < slack_min < < endl ;
2011-03-23 15:13:38 +03:00
cerr < < " max-number-oracles: " < < maxNumberOracles < < endl ;
2011-05-12 18:21:03 +04:00
cerr < < " accumulate-most-violated-constraints: " < < accumulateMostViolatedConstraints < < endl ;
2011-03-23 15:13:38 +03:00
cerr < < " past-and-current-constraints: " < < pastAndCurrentConstraints < < endl ;
cerr < < " log-feature-values: " < < logFeatureValues < < endl ;
cerr < < " base-of-log: " < < baseOfLog < < endl ;
cerr < < " decoder-settings: " < < decoder_settings < < endl ;
cerr < < " min-weight-change: " < < min_weight_change < < endl ;
cerr < < " max-sentence-update: " < < max_sentence_update < < endl ;
cerr < < " decr-learning-rate: " < < decrease_learning_rate < < endl ;
cerr < < " dev-bleu: " < < devBleu < < endl ;
cerr < < " normalise: " < < normaliseWeights < < endl ;
cerr < < " print-feature-values: " < < print_feature_values < < endl ;
cerr < < " stop-dev-bleu: " < < stop_dev_bleu < < endl ;
cerr < < " stop-approx-dev-bleu: " < < stop_approx_dev_bleu < < endl ;
cerr < < " stop-weights: " < < weightConvergence < < endl ;
2011-03-28 22:11:45 +04:00
cerr < < " updates-per-epoch: " < < updates_per_epoch < < endl ;
2011-05-09 13:39:57 +04:00
cerr < < " average-weights: " < < averageWeights < < endl ;
cerr < < " history-of-1best: " < < historyOf1best < < endl ;
cerr < < " sentence-level-bleu: " < < sentenceLevelBleu < < endl ;
cerr < < " bleu-score-weight: " < < bleuScoreWeight < < endl ;
cerr < < " precision: " < < precision < < endl ;
cerr < < " min-bleu-change: " < < min_bleu_change < < endl ;
2011-05-10 21:17:19 +04:00
cerr < < " perceptron-update: " < < perceptron_update < < endl ;
cerr < < " analytical-update: " < < analytical_update < < endl ;
2011-05-12 18:21:03 +04:00
cerr < < " hope-fear: " < < hope_fear < < endl ;
2011-03-23 21:42:18 +03:00
2011-03-23 15:13:38 +03:00
if ( learner = = " mira " ) {
cerr < < " Optimising using Mira " < < endl ;
optimiser = new MiraOptimiser ( n , hildreth , marginScaleFactor ,
onlyViolatedConstraints , slack , weightedLossFunction , maxNumberOracles ,
2011-05-04 22:26:20 +04:00
accumulateMostViolatedConstraints , pastAndCurrentConstraints , order . size ( ) , precision ) ;
2011-03-23 15:13:38 +03:00
if ( hildreth ) {
cerr < < " Using Hildreth's optimisation algorithm.. " < < endl ;
}
} else if ( learner = = " perceptron " ) {
cerr < < " Optimising using Perceptron " < < endl ;
optimiser = new Perceptron ( ) ;
} else {
cerr < < " Error: Unknown optimiser: " < < learner < < endl ;
}
//Main loop:
2011-03-28 22:11:45 +04:00
// print initial weights
2011-03-29 21:08:07 +04:00
cerr < < " Rank " < < rank < < " , initial weights: " < < decoder - > getWeights ( ) < < endl ;
ScoreComponentCollection cumulativeWeights ; // collect weights per epoch to produce an average
2011-04-09 01:04:08 +04:00
size_t numberOfUpdates = 0 ;
size_t numberOfUpdatesThisEpoch = 0 ;
2011-03-23 15:13:38 +03:00
time_t now = time ( 0 ) ; // get current time
struct tm * tm = localtime ( & now ) ; // get struct filled out
cerr < < " Start date/time: " < < tm - > tm_mon + 1 < < " / " < < tm - > tm_mday < < " / "
< < tm - > tm_year + 1900 < < " , " < < tm - > tm_hour < < " : " < < tm - > tm_min < < " : "
< < tm - > tm_sec < < endl ;
2011-03-28 22:11:45 +04:00
ScoreComponentCollection mixedAverageWeights ;
ScoreComponentCollection mixedAverageWeightsPrevious ;
ScoreComponentCollection mixedAverageWeightsBeforePrevious ;
2011-03-23 15:13:38 +03:00
2011-05-09 13:39:57 +04:00
/* float averageRatio = 0;
2011-03-23 15:13:38 +03:00
float averageBleu = 0 ;
float prevAverageBleu = 0 ;
float beforePrevAverageBleu = 0 ;
float summedApproxBleu = 0 ;
float averageApproxBleu = 0 ;
float prevAverageApproxBleu = 0 ;
2011-05-09 13:39:57 +04:00
float beforePrevAverageApproxBleu = 0 ; */
2011-03-23 15:13:38 +03:00
bool stop = false ;
2011-05-09 13:39:57 +04:00
int sumStillViolatedConstraints ;
int sumStillViolatedConstraints_lastEpoch = 0 ;
int sumConstraintChangeAbs ;
int sumConstraintChangeAbs_lastEpoch = 0 ;
size_t sumBleuChangeAbs ;
2011-03-23 15:13:38 +03:00
float * sendbuf , * recvbuf ;
sendbuf = ( float * ) malloc ( sizeof ( float ) ) ;
recvbuf = ( float * ) malloc ( sizeof ( float ) ) ;
2011-03-29 21:08:07 +04:00
// Note: make sure that the variable mosesWeights always holds the current decoder weights
2011-03-23 15:13:38 +03:00
for ( size_t epoch = 0 ; epoch < epochs & & ! stop ; + + epoch ) {
2011-04-08 16:45:01 +04:00
cerr < < " \n Rank " < < rank < < " , epoch " < < epoch < < endl ;
2011-04-09 01:04:08 +04:00
2011-04-11 02:07:41 +04:00
// sum of violated constraints
sumStillViolatedConstraints = 0 ;
sumConstraintChangeAbs = 0 ;
2011-05-09 13:39:57 +04:00
sumBleuChangeAbs = 0 ;
2011-04-09 01:04:08 +04:00
// sum of approx. sentence bleu scores per epoch
2011-05-09 13:39:57 +04:00
// summedApproxBleu = 0;
2011-04-09 01:04:08 +04:00
numberOfUpdatesThisEpoch = 0 ;
2011-03-23 15:13:38 +03:00
// Sum up weights over one epoch, final average uses weights from last epoch
if ( ! accumulateWeights ) {
cumulativeWeights . ZeroAll ( ) ;
}
// number of weight dumps this epoch
size_t weightEpochDump = 0 ;
// collect best model score translations for computing bleu on dev set
vector < vector < const Word * > > allBestModelScore ;
vector < size_t > all_ref_ids ;
size_t shardPosition = 0 ;
vector < size_t > : : const_iterator sid = shard . begin ( ) ;
while ( sid ! = shard . end ( ) ) {
// feature values for hypotheses i,j (matrix: batchSize x 3*n x featureValues)
vector < vector < ScoreComponentCollection > > featureValues ;
2011-05-12 15:05:48 +04:00
vector < vector < ScoreComponentCollection > > dummyFeatureValues ;
2011-03-23 15:13:38 +03:00
vector < vector < float > > bleuScores ;
2011-05-12 15:05:48 +04:00
vector < vector < float > > dummyBleuScores ;
2011-03-23 15:13:38 +03:00
// get moses weights
ScoreComponentCollection mosesWeights = decoder - > getWeights ( ) ;
2011-04-08 16:45:01 +04:00
cerr < < " \n Rank " < < rank < < " , next batch " < < endl ;
2011-04-08 14:59:41 +04:00
cerr < < " Rank " < < rank < < " , weights: " < < mosesWeights < < endl ;
2011-03-23 15:13:38 +03:00
// BATCHING: produce nbest lists for all input sentences in batch
vector < float > oracleBleuScores ;
vector < vector < const Word * > > oracles ;
2011-04-26 23:35:06 +04:00
vector < vector < const Word * > > oneBests ;
2011-03-23 15:13:38 +03:00
vector < ScoreComponentCollection > oracleFeatureValues ;
vector < size_t > inputLengths ;
vector < size_t > ref_ids ;
size_t actualBatchSize = 0 ;
2011-04-04 17:03:27 +04:00
2011-04-11 16:22:19 +04:00
vector < size_t > : : const_iterator current_sid_start = sid ;
2011-03-23 15:13:38 +03:00
for ( size_t batchPosition = 0 ; batchPosition < batchSize & & sid
! = shard . end ( ) ; + + batchPosition ) {
2011-04-04 17:03:27 +04:00
string & input = inputSentences [ * sid ] ;
2011-03-23 15:13:38 +03:00
const vector < string > & refs = referenceSentences [ * sid ] ;
cerr < < " Rank " < < rank < < " , batch position " < < batchPosition < < endl ;
2011-03-28 22:11:45 +04:00
cerr < < " Rank " < < rank < < " , input sentence " < < * sid < < " : \" " < < input < < " \" " < < endl ;
2011-03-23 15:13:38 +03:00
vector < ScoreComponentCollection > newFeatureValues ;
vector < float > newBleuScores ;
featureValues . push_back ( newFeatureValues ) ;
2011-05-12 15:05:48 +04:00
dummyFeatureValues . push_back ( newFeatureValues ) ;
2011-03-23 15:13:38 +03:00
bleuScores . push_back ( newBleuScores ) ;
2011-05-12 15:05:48 +04:00
dummyBleuScores . push_back ( newBleuScores ) ;
2011-03-23 15:13:38 +03:00
2011-05-01 18:17:40 +04:00
size_t pass_n = ( epoch = = 0 ) ? nbest_first : n ;
2011-05-10 21:17:19 +04:00
if ( perceptron_update | | analytical_update ) {
2011-05-09 13:39:57 +04:00
if ( constraints = = 1 ) {
2011-05-11 19:12:05 +04:00
if ( historyOf1best ) {
// MODEL (for updating the history)
cerr < < " Rank " < < rank < < " , run decoder to get " < < 1 < < " best wrt model score " < < endl ;
vector < const Word * > bestModel = decoder - > getNBest ( input , * sid , 1 , 0.0 , bleuScoreWeight ,
2011-05-12 15:05:48 +04:00
dummyFeatureValues [ batchPosition ] , dummyBleuScores [ batchPosition ] , true ,
2011-05-11 19:12:05 +04:00
distinctNbest , rank ) ;
decoder - > cleanup ( ) ;
oneBests . push_back ( bestModel ) ;
2011-05-12 15:05:48 +04:00
cerr < < " Rank " < < rank < < " , model length: " < < bestModel . size ( ) < < " Bleu: " < < dummyBleuScores [ batchPosition ] [ 0 ] < < endl ;
2011-05-11 19:12:05 +04:00
}
2011-05-09 13:39:57 +04:00
// HOPE
2011-05-10 21:17:19 +04:00
cerr < < " Rank " < < rank < < " , run decoder to get 1best hope translations " < < endl ;
2011-05-12 15:05:48 +04:00
size_t oraclePos = dummyFeatureValues [ batchPosition ] . size ( ) ;
2011-05-09 13:39:57 +04:00
vector < const Word * > oracle = decoder - > getNBest ( input , * sid , 1 , 1.0 , bleuScoreWeight ,
2011-05-12 15:05:48 +04:00
dummyFeatureValues [ batchPosition ] , dummyBleuScores [ batchPosition ] , true ,
2011-05-09 13:39:57 +04:00
distinctNbest , rank ) ;
2011-05-11 19:12:05 +04:00
// needed for history
2011-05-10 21:17:19 +04:00
inputLengths . push_back ( decoder - > getCurrentInputLength ( ) ) ;
ref_ids . push_back ( * sid ) ;
2011-05-09 13:39:57 +04:00
decoder - > cleanup ( ) ;
oracles . push_back ( oracle ) ;
2011-05-12 15:05:48 +04:00
cerr < < " Rank " < < rank < < " , oracle length: " < < oracle . size ( ) < < " Bleu: " < < dummyBleuScores [ batchPosition ] [ oraclePos ] < < endl ;
2011-05-09 13:39:57 +04:00
2011-05-12 15:05:48 +04:00
oracleFeatureValues . push_back ( dummyFeatureValues [ batchPosition ] [ oraclePos ] ) ;
oracleBleuScores . push_back ( dummyBleuScores [ batchPosition ] [ oraclePos ] ) ;
// clear dummies
dummyFeatureValues [ batchPosition ] . clear ( ) ;
dummyBleuScores [ batchPosition ] . clear ( ) ;
2011-05-10 21:17:19 +04:00
// FEAR
cerr < < " Rank " < < rank < < " , run decoder to get 1best fear translations " < < endl ;
size_t fearPos = featureValues [ batchPosition ] . size ( ) ;
vector < const Word * > fear = decoder - > getNBest ( input , * sid , 1 , - 1.0 , bleuScoreWeight ,
featureValues [ batchPosition ] , bleuScores [ batchPosition ] , true ,
distinctNbest , rank ) ;
decoder - > cleanup ( ) ;
2011-05-11 19:12:05 +04:00
cerr < < " Rank " < < rank < < " , fear length: " < < fear . size ( ) < < " Bleu: " < < bleuScores [ batchPosition ] [ fearPos ] < < endl ;
2011-05-10 21:17:19 +04:00
for ( size_t i = 0 ; i < fear . size ( ) ; + + i ) {
delete fear [ i ] ;
}
2011-05-09 13:39:57 +04:00
}
else {
// TODO:
}
}
else {
2011-05-12 18:21:03 +04:00
if ( ! hope_fear ) {
// MODEL
cerr < < " Rank " < < rank < < " , run decoder to get " < < pass_n < < " best wrt model score " < < endl ;
vector < const Word * > bestModel = decoder - > getNBest ( input , * sid , pass_n , 0.0 , bleuScoreWeight ,
2011-04-26 23:35:06 +04:00
featureValues [ batchPosition ] , bleuScores [ batchPosition ] , true ,
2011-03-28 22:11:45 +04:00
distinctNbest , rank ) ;
2011-05-12 18:21:03 +04:00
decoder - > cleanup ( ) ;
oneBests . push_back ( bestModel ) ;
// needed for calculating bleu of dev (1best translations) // todo:
all_ref_ids . push_back ( * sid ) ;
allBestModelScore . push_back ( bestModel ) ;
cerr < < " Rank " < < rank < < " , model length: " < < bestModel . size ( ) < < " Bleu: " < < bleuScores [ batchPosition ] [ 0 ] < < endl ;
}
else if ( historyOf1best ) {
// MODEL (for updating the history only, using dummy vectors)
cerr < < " Rank " < < rank < < " , run decoder to get " < < 1 < < " best wrt model score " < < endl ;
vector < const Word * > bestModel = decoder - > getNBest ( input , * sid , 1 , 0.0 , bleuScoreWeight ,
dummyFeatureValues [ batchPosition ] , dummyBleuScores [ batchPosition ] , true ,
distinctNbest , rank ) ;
decoder - > cleanup ( ) ;
oneBests . push_back ( bestModel ) ;
cerr < < " Rank " < < rank < < " , model length: " < < bestModel . size ( ) < < " Bleu: " < < dummyBleuScores [ batchPosition ] [ 0 ] < < endl ;
}
2011-05-09 13:39:57 +04:00
// HOPE
cerr < < " Rank " < < rank < < " , run decoder to get " < < pass_n < < " best hope translations " < < endl ;
size_t oraclePos = featureValues [ batchPosition ] . size ( ) ;
vector < const Word * > oracle = decoder - > getNBest ( input , * sid , pass_n , 1.0 , bleuScoreWeight ,
2011-04-26 23:35:06 +04:00
featureValues [ batchPosition ] , bleuScores [ batchPosition ] , true ,
2011-03-28 22:11:45 +04:00
distinctNbest , rank ) ;
2011-05-12 18:21:03 +04:00
// needed for history
inputLengths . push_back ( decoder - > getCurrentInputLength ( ) ) ;
ref_ids . push_back ( * sid ) ;
2011-05-09 13:39:57 +04:00
decoder - > cleanup ( ) ;
oracles . push_back ( oracle ) ;
cerr < < " Rank " < < rank < < " , oracle length: " < < oracle . size ( ) < < " Bleu: " < < bleuScores [ batchPosition ] [ oraclePos ] < < endl ;
oracleFeatureValues . push_back ( featureValues [ batchPosition ] [ oraclePos ] ) ;
2011-05-12 15:05:48 +04:00
oracleBleuScores . push_back ( bleuScores [ batchPosition ] [ oraclePos ] ) ;
2011-05-09 13:39:57 +04:00
// FEAR
cerr < < " Rank " < < rank < < " , run decoder to get " < < pass_n < < " best fear translations " < < endl ;
size_t fearPos = featureValues [ batchPosition ] . size ( ) ;
vector < const Word * > fear = decoder - > getNBest ( input , * sid , pass_n , - 1.0 , bleuScoreWeight ,
2011-03-28 22:11:45 +04:00
featureValues [ batchPosition ] , bleuScores [ batchPosition ] , true ,
distinctNbest , rank ) ;
2011-05-09 13:39:57 +04:00
decoder - > cleanup ( ) ;
cerr < < " Rank " < < rank < < " , fear length: " < < fear . size ( ) < < " Bleu: " < < bleuScores [ batchPosition ] [ fearPos ] < < endl ;
for ( size_t i = 0 ; i < fear . size ( ) ; + + i ) {
delete fear [ i ] ;
}
2011-03-22 20:17:43 +03:00
}
2011-02-24 13:54:16 +03:00
2011-05-09 13:39:57 +04:00
// cerr << "Rank " << rank << ", sentence " << *sid << ", best model Bleu (approximate sentence bleu): " << bleuScores[batchPosition][0] << endl;
// summedApproxBleu += bleuScores[batchPosition][0];
2011-03-23 15:13:38 +03:00
// next input sentence
+ + sid ;
+ + actualBatchSize ;
+ + shardPosition ;
} // end of batch loop
// Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis)
vector < vector < float > > losses ( actualBatchSize ) ;
for ( size_t batchPosition = 0 ; batchPosition < actualBatchSize ; + + batchPosition ) {
for ( size_t j = 0 ; j < bleuScores [ batchPosition ] . size ( ) ; + + j ) {
losses [ batchPosition ] . push_back ( oracleBleuScores [ batchPosition ]
- bleuScores [ batchPosition ] [ j ] ) ;
}
}
// set weight for bleu feature to 0
const vector < const ScoreProducer * > featureFunctions =
2011-03-29 21:08:07 +04:00
StaticData : : Instance ( ) . GetTranslationSystem ( TranslationSystem : : DEFAULT ) . GetFeatureFunctions ( ) ;
2011-03-23 15:13:38 +03:00
mosesWeights . Assign ( featureFunctions . back ( ) , 0 ) ;
if ( logFeatureValues ) {
2011-03-02 20:41:13 +03:00
for ( size_t i = 0 ; i < featureValues . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < featureValues [ i ] . size ( ) ; + + j ) {
2011-03-23 15:13:38 +03:00
featureValues [ i ] [ j ] . ApplyLog ( baseOfLog ) ;
}
oracleFeatureValues [ i ] . ApplyLog ( baseOfLog ) ;
}
}
2011-04-08 14:59:41 +04:00
// get 1best model results with old weights
2011-04-13 15:22:10 +04:00
vector < vector < float > > bestModelOld_batch ;
2011-04-11 16:22:19 +04:00
for ( size_t i = 0 ; i < actualBatchSize ; + + i ) {
string & input = inputSentences [ * current_sid_start + i ] ;
2011-04-26 23:35:06 +04:00
vector < float > bestModelOld = decoder - > getBleuAndScore ( input , * current_sid_start + i , 0.0 , bleuScoreWeight , distinctNbest ) ;
2011-04-13 15:22:10 +04:00
bestModelOld_batch . push_back ( bestModelOld ) ;
2011-04-04 17:03:27 +04:00
decoder - > cleanup ( ) ;
2011-04-08 14:59:41 +04:00
}
2011-04-04 17:03:27 +04:00
2011-04-08 14:59:41 +04:00
// optionally print out the feature values
if ( print_feature_values ) {
2011-05-04 22:26:20 +04:00
cerr < < " \n Rank " < < rank < < " , epoch " < < epoch < < " , feature values: " < < endl ;
2011-04-08 14:59:41 +04:00
for ( size_t i = 0 ; i < featureValues . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < featureValues [ i ] . size ( ) ; + + j ) {
cerr < < featureValues [ i ] [ j ] < < endl ;
}
}
2011-04-04 17:03:27 +04:00
cerr < < endl ;
}
2011-05-10 21:17:19 +04:00
// Run optimiser on batch:
2011-05-04 22:26:20 +04:00
cerr < < " \n Rank " < < rank < < " , epoch " < < epoch < < " , run optimiser: " < < endl ;
2011-04-08 14:59:41 +04:00
ScoreComponentCollection oldWeights ( mosesWeights ) ;
2011-04-15 15:34:51 +04:00
vector < int > update_status ;
2011-05-10 21:17:19 +04:00
if ( perceptron_update ) {
// w += 0.01 (hope-fear)
cerr < < " hope: " < < oracleFeatureValues [ 0 ] < < endl ;
cerr < < " fear: " < < featureValues [ 0 ] [ 0 ] < < endl ;
ScoreComponentCollection featureValueDiff = oracleFeatureValues [ 0 ] ;
featureValueDiff . MinusEquals ( featureValues [ 0 ] [ 0 ] ) ;
cerr < < " hope - fear: " < < featureValueDiff < < endl ;
featureValueDiff . MultiplyEquals ( 0.01 ) ;
mosesWeights . PlusEquals ( featureValueDiff ) ;
update_status . push_back ( 0 ) ;
update_status . push_back ( 0 ) ;
update_status . push_back ( 0 ) ;
}
else if ( analytical_update ) {
2011-05-09 13:39:57 +04:00
update_status = optimiser - > updateWeightsAnalytically ( mosesWeights , featureValues [ 0 ] [ 0 ] ,
losses [ 0 ] [ 0 ] , oracleFeatureValues [ 0 ] , oracleBleuScores [ 0 ] , ref_ids [ 0 ] ,
learning_rate , max_sentence_update , rank , epoch , controlUpdates ) ;
}
else {
update_status = optimiser - > updateWeights ( mosesWeights , featureValues ,
2011-03-23 15:13:38 +03:00
losses , bleuScores , oracleFeatureValues , oracleBleuScores , ref_ids ,
2011-04-11 16:22:19 +04:00
learning_rate , max_sentence_update , rank , epoch , updates_per_epoch , controlUpdates ) ;
2011-05-09 13:39:57 +04:00
}
2011-04-08 16:45:01 +04:00
2011-04-11 02:07:41 +04:00
if ( update_status [ 0 ] = = 1 ) {
2011-05-03 17:58:59 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , no update for batch " < < endl ;
2011-04-08 14:59:41 +04:00
}
2011-04-11 02:07:41 +04:00
else if ( update_status [ 0 ] = = - 1 ) {
2011-05-03 17:58:59 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , update ignored " < < endl ;
2011-04-08 14:59:41 +04:00
}
2011-04-08 16:45:01 +04:00
else {
2011-04-11 02:07:41 +04:00
sumConstraintChangeAbs + = abs ( update_status [ 1 ] - update_status [ 2 ] ) ;
sumStillViolatedConstraints + = update_status [ 2 ] ;
2011-03-23 15:13:38 +03:00
2011-04-08 14:59:41 +04:00
if ( updates_per_epoch = = - 1 ) {
// pass new weights to decoder
if ( normaliseWeights ) {
mosesWeights . L1Normalise ( ) ;
2011-03-28 22:11:45 +04:00
}
2011-03-29 21:08:07 +04:00
2011-04-08 14:59:41 +04:00
cumulativeWeights . PlusEquals ( mosesWeights ) ;
2011-04-09 01:04:08 +04:00
+ + numberOfUpdates ;
+ + numberOfUpdatesThisEpoch ;
2011-04-08 17:11:35 +04:00
if ( averageWeights ) {
2011-04-08 14:59:41 +04:00
ScoreComponentCollection averageWeights ( cumulativeWeights ) ;
if ( accumulateWeights ) {
2011-04-09 01:04:08 +04:00
averageWeights . DivideEquals ( numberOfUpdates ) ;
2011-04-08 14:59:41 +04:00
} else {
2011-04-09 01:04:08 +04:00
averageWeights . DivideEquals ( numberOfUpdatesThisEpoch ) ;
2011-04-08 14:59:41 +04:00
}
mosesWeights = averageWeights ;
2011-05-04 22:26:20 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , set new average weights: " < < mosesWeights < < endl ;
2011-04-08 14:59:41 +04:00
}
else {
2011-05-04 22:26:20 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , set new weights: " < < mosesWeights < < endl ;
2011-04-08 14:59:41 +04:00
}
2011-03-29 21:08:07 +04:00
2011-04-08 14:59:41 +04:00
// set new Moses weights (averaged or not)
decoder - > setWeights ( mosesWeights ) ;
2011-03-23 15:13:38 +03:00
2011-04-08 14:59:41 +04:00
// compute difference to old weights
ScoreComponentCollection weightDifference ( mosesWeights ) ;
weightDifference . MinusEquals ( oldWeights ) ;
2011-05-04 22:26:20 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , weight difference: " < < weightDifference < < endl ;
2011-03-23 15:13:38 +03:00
2011-04-11 16:22:19 +04:00
// get 1best model results with new weights (for each sentence in batch)
2011-04-08 14:59:41 +04:00
vector < float > bestModelNew ;
2011-04-11 16:22:19 +04:00
for ( size_t i = 0 ; i < actualBatchSize ; + + i ) {
string & input = inputSentences [ * current_sid_start + i ] ;
2011-04-26 23:35:06 +04:00
bestModelNew = decoder - > getBleuAndScore ( input , * current_sid_start + i , 0.0 , bleuScoreWeight , distinctNbest ) ;
2011-04-04 17:03:27 +04:00
decoder - > cleanup ( ) ;
2011-05-09 13:39:57 +04:00
sumBleuChangeAbs + = abs ( bestModelOld_batch [ i ] [ 0 ] - bestModelNew [ 0 ] ) ;
2011-04-13 15:22:10 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , 1best model bleu, old: " < < bestModelOld_batch [ i ] [ 0 ] < < " , new: " < < bestModelNew [ 0 ] < < endl ;
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , 1best model score, old: " < < bestModelOld_batch [ i ] [ 1 ] < < " , new: " < < bestModelNew [ 1 ] < < endl ;
2011-03-02 20:41:13 +03:00
}
}
2011-03-23 15:13:38 +03:00
}
2011-05-09 13:39:57 +04:00
// update history (for approximate document Bleu)
2011-04-26 23:35:06 +04:00
if ( ! sentenceLevelBleu ) {
if ( historyOf1best ) {
for ( size_t i = 0 ; i < oneBests . size ( ) ; + + i ) {
2011-05-11 19:12:05 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , update history with 1best length: " < < oneBests [ i ] . size ( ) < < " " ;
2011-04-26 23:35:06 +04:00
}
decoder - > updateHistory ( oneBests , inputLengths , ref_ids , rank , epoch ) ;
}
else {
for ( size_t i = 0 ; i < oracles . size ( ) ; + + i ) {
2011-05-11 19:12:05 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , update history with oracle length: " < < oracles [ i ] . size ( ) < < " " ;
2011-04-26 23:35:06 +04:00
}
decoder - > updateHistory ( oracles , inputLengths , ref_ids , rank , epoch ) ;
}
2011-03-23 15:13:38 +03:00
}
2011-05-11 19:12:05 +04:00
// clean up oracle and 1best translations after updating history
2011-03-23 15:13:38 +03:00
for ( size_t i = 0 ; i < oracles . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < oracles [ i ] . size ( ) ; + + j ) {
delete oracles [ i ] [ j ] ;
}
}
2011-05-11 19:12:05 +04:00
for ( size_t i = 0 ; i < oneBests . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < oneBests [ i ] . size ( ) ; + + j ) {
delete oneBests [ i ] [ j ] ;
}
}
2011-03-23 15:13:38 +03:00
2011-03-28 22:11:45 +04:00
bool makeUpdate = updates_per_epoch = = - 1 ? 0 : ( shardPosition % ( shard . size ( ) / updates_per_epoch ) = = 0 ) ;
2011-03-23 20:25:33 +03:00
2011-03-23 21:42:18 +03:00
// apply accumulated updates
2011-03-23 20:25:33 +03:00
if ( makeUpdate & & typeid ( * optimiser ) = = typeid ( MiraOptimiser ) ) {
2011-03-29 21:08:07 +04:00
mosesWeights = decoder - > getWeights ( ) ;
2011-03-23 20:25:33 +03:00
ScoreComponentCollection accumulatedUpdates = ( ( MiraOptimiser * ) optimiser ) - > getAccumulatedUpdates ( ) ;
2011-05-04 22:26:20 +04:00
cerr < < " \n Rank " < < rank < < " , epoch " < < epoch < < " , updates to apply during epoch " < < epoch < < " : " < < accumulatedUpdates < < endl ;
2011-04-09 01:04:08 +04:00
if ( accumulatedUpdates . GetWeightedScore ( ) ! = 0 ) {
mosesWeights . PlusEquals ( accumulatedUpdates ) ;
( ( MiraOptimiser * ) optimiser ) - > resetAccumulatedUpdates ( ) ;
2011-03-23 20:25:33 +03:00
2011-04-09 01:04:08 +04:00
if ( normaliseWeights ) {
mosesWeights . L1Normalise ( ) ;
}
2011-03-23 20:25:33 +03:00
2011-04-09 01:04:08 +04:00
cumulativeWeights . PlusEquals ( mosesWeights ) ;
+ + numberOfUpdates ;
+ + numberOfUpdatesThisEpoch ;
2011-03-29 21:08:07 +04:00
2011-04-09 01:04:08 +04:00
if ( averageWeights ) {
ScoreComponentCollection averageWeights ( cumulativeWeights ) ;
if ( accumulateWeights ) {
averageWeights . DivideEquals ( numberOfUpdates ) ;
} else {
averageWeights . DivideEquals ( numberOfUpdatesThisEpoch ) ;
}
mosesWeights = averageWeights ;
2011-05-04 22:26:20 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , set new average weights after applying cumulative update: " < < mosesWeights < < endl ;
2011-04-09 01:04:08 +04:00
}
else {
2011-05-04 22:26:20 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , set new weights after applying cumulative update: " < < mosesWeights < < endl ;
2011-03-29 21:08:07 +04:00
}
2011-04-09 01:04:08 +04:00
decoder - > setWeights ( mosesWeights ) ;
// compute difference to old weights
ScoreComponentCollection weightDifference ( mosesWeights ) ;
weightDifference . MinusEquals ( oldWeights ) ;
2011-05-04 22:26:20 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , weight difference: " < < weightDifference < < endl ;
2011-05-05 15:35:01 +04:00
// get 1best model results with new weights (for each sentence in batch)
vector < float > bestModelNew ;
for ( size_t i = 0 ; i < actualBatchSize ; + + i ) {
string & input = inputSentences [ * current_sid_start + i ] ;
bestModelNew = decoder - > getBleuAndScore ( input , * current_sid_start + i , 0.0 , bleuScoreWeight , distinctNbest ) ;
decoder - > cleanup ( ) ;
2011-05-09 13:39:57 +04:00
sumBleuChangeAbs + = abs ( bestModelOld_batch [ i ] [ 0 ] - bestModelNew [ 0 ] ) ;
2011-05-05 15:35:01 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , 1best model bleu, old: " < < bestModelOld_batch [ i ] [ 0 ] < < " , new: " < < bestModelNew [ 0 ] < < endl ;
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , 1best model score, old: " < < bestModelOld_batch [ i ] [ 1 ] < < " , new: " < < bestModelNew [ 1 ] < < endl ;
}
2011-04-08 14:59:41 +04:00
}
else {
2011-05-04 22:26:20 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , cumulative update is empty.. " < < endl ;
2011-03-29 21:08:07 +04:00
}
2011-03-23 20:25:33 +03:00
}
2011-04-13 17:11:57 +04:00
size_t mixing_base = mixingFrequency = = 0 ? 0 : shard . size ( ) / mixingFrequency ;
size_t dumping_base = weightDumpFrequency = = 0 ? 0 : shard . size ( ) / weightDumpFrequency ;
2011-03-28 22:11:45 +04:00
// mix weights?
2011-04-10 23:48:57 +04:00
if ( evaluateModulo ( shardPosition , mixing_base , actualBatchSize ) ) {
2011-03-07 17:12:36 +03:00
# ifdef MPI_ENABLE
2011-03-29 21:08:07 +04:00
ScoreComponentCollection mixedWeights ;
2011-03-23 20:25:33 +03:00
cerr < < " \n Rank " < < rank < < " , before mixing: " < < mosesWeights < < endl ;
2011-03-28 22:11:45 +04:00
// collect all weights in mixedWeights and divide by number of processes
mpi : : reduce ( world , mosesWeights , mixedWeights , SCCPlus ( ) , 0 ) ;
2011-03-23 20:25:33 +03:00
if ( rank = = 0 ) {
// divide by number of processes
2011-03-28 22:11:45 +04:00
mixedWeights . DivideEquals ( size ) ;
2011-03-23 20:25:33 +03:00
// normalise weights after averaging
if ( normaliseWeights ) {
2011-03-28 22:11:45 +04:00
mixedWeights . L1Normalise ( ) ;
cerr < < " Mixed weights (normalised): " < < mixedWeights < < endl ;
2011-03-23 15:13:38 +03:00
}
2011-03-23 20:25:33 +03:00
else {
2011-03-28 22:11:45 +04:00
cerr < < " Mixed weights: " < < mixedWeights < < endl ;
2011-03-23 20:25:33 +03:00
}
}
2011-03-23 15:13:38 +03:00
2011-03-23 20:25:33 +03:00
// broadcast average weights from process 0
2011-03-28 22:11:45 +04:00
mpi : : broadcast ( world , mixedWeights , 0 ) ;
decoder - > setWeights ( mixedWeights ) ;
2011-03-29 21:08:07 +04:00
mosesWeights = mixedWeights ;
2010-11-24 20:06:54 +03:00
# endif
# ifndef MPI_ENABLE
2011-03-29 21:08:07 +04:00
cerr < < " \n Rank " < < rank < < " , no mixing, weights: " < < mosesWeights < < endl ;
2010-12-06 18:28:51 +03:00
# endif
2011-03-23 20:25:33 +03:00
} // end mixing
2011-03-23 15:13:38 +03:00
2011-03-28 22:11:45 +04:00
// Dump weights?
2011-04-10 23:48:57 +04:00
if ( evaluateModulo ( shardPosition , dumping_base , actualBatchSize ) ) {
2011-03-28 22:11:45 +04:00
ScoreComponentCollection tmpAverageWeights ( cumulativeWeights ) ;
2011-03-23 20:25:33 +03:00
if ( accumulateWeights ) {
2011-04-09 01:04:08 +04:00
tmpAverageWeights . DivideEquals ( numberOfUpdates ) ;
2011-03-23 20:25:33 +03:00
} else {
2011-04-09 01:04:08 +04:00
tmpAverageWeights . DivideEquals ( numberOfUpdatesThisEpoch ) ;
2011-03-23 20:25:33 +03:00
}
2011-03-08 19:58:02 +03:00
# ifdef MPI_ENABLE
2011-03-23 20:25:33 +03:00
// average across processes
2011-03-28 22:11:45 +04:00
mpi : : reduce ( world , tmpAverageWeights , mixedAverageWeights , SCCPlus ( ) , 0 ) ;
2011-03-08 19:58:02 +03:00
# endif
# ifndef MPI_ENABLE
2011-03-28 22:11:45 +04:00
mixedAverageWeights = tmpAverageWeights ;
2011-03-08 19:58:02 +03:00
# endif
2011-03-23 20:25:33 +03:00
if ( rank = = 0 & & ! weightDumpStem . empty ( ) ) {
// divide by number of processes
2011-03-28 22:11:45 +04:00
mixedAverageWeights . DivideEquals ( size ) ;
2011-03-23 20:25:33 +03:00
// normalise weights after averaging
if ( normaliseWeights ) {
2011-03-28 22:11:45 +04:00
mixedAverageWeights . L1Normalise ( ) ;
2011-03-23 20:25:33 +03:00
}
// dump final average weights
ostringstream filename ;
if ( epoch < 10 ) {
2011-03-23 15:13:38 +03:00
filename < < weightDumpStem < < " _0 " < < epoch ;
2011-03-23 20:25:33 +03:00
} else {
filename < < weightDumpStem < < " _ " < < epoch ;
}
2011-03-23 15:13:38 +03:00
2011-04-11 16:23:36 +04:00
if ( weightDumpFrequency > 1 ) {
2011-03-23 20:25:33 +03:00
filename < < " _ " < < weightEpochDump ;
}
2011-03-23 15:13:38 +03:00
2011-03-23 20:25:33 +03:00
if ( accumulateWeights ) {
2011-03-28 22:11:45 +04:00
cerr < < " \n Mixed average weights (cumulative) during epoch " < < epoch < < " : " < < mixedAverageWeights < < endl ;
2011-03-23 20:25:33 +03:00
} else {
2011-03-28 22:11:45 +04:00
cerr < < " \n Mixed average weights during epoch " < < epoch < < " : " < < mixedAverageWeights < < endl ;
2011-03-23 20:25:33 +03:00
}
2011-03-28 22:11:45 +04:00
cerr < < " Dumping mixed average weights during epoch " < < epoch < < " to " < < filename . str ( ) < < endl ;
mixedAverageWeights . Save ( filename . str ( ) ) ;
2011-03-23 20:25:33 +03:00
+ + weightEpochDump ;
}
2011-04-10 23:48:57 +04:00
} // end dumping
2011-03-23 20:25:33 +03:00
} // end of shard loop, end of this epoch
2011-03-23 15:13:38 +03:00
2011-04-26 23:35:06 +04:00
cerr < < " Bleu feature history after epoch " < < epoch < < endl ;
decoder - > printBleuFeatureHistory ( cerr ) ;
2011-05-09 13:39:57 +04:00
// Check whether there were any weight updates during this epoch
2011-04-09 01:04:08 +04:00
size_t sumUpdates ;
2011-04-10 23:05:36 +04:00
size_t * sendbuf_uint , * recvbuf_uint ;
sendbuf_uint = ( size_t * ) malloc ( sizeof ( size_t ) ) ;
recvbuf_uint = ( size_t * ) malloc ( sizeof ( size_t ) ) ;
2011-04-09 01:04:08 +04:00
# ifdef MPI_ENABLE
2011-04-10 20:50:28 +04:00
//mpi::reduce(world, numberOfUpdatesThisEpoch, sumUpdates, MPI_SUM, 0);
2011-04-10 23:05:36 +04:00
sendbuf_uint [ 0 ] = numberOfUpdatesThisEpoch ;
recvbuf_uint [ 0 ] = 0 ;
MPI_Reduce ( sendbuf_uint , recvbuf_uint , 1 , MPI_UNSIGNED , MPI_SUM , 0 , world ) ;
sumUpdates = recvbuf_uint [ 0 ] ;
2011-04-09 01:04:08 +04:00
# endif
# ifndef MPI_ENABLE
sumUpdates = numberOfUpdatesThisEpoch ;
# endif
2011-04-10 23:05:36 +04:00
if ( rank = = 0 & & sumUpdates = = 0 ) {
cerr < < " \n No weight updates during this epoch.. stopping. " < < endl ;
stop = true ;
# ifdef MPI_ENABLE
mpi : : broadcast ( world , stop , 0 ) ;
# endif
2011-04-08 14:59:41 +04:00
}
2011-04-09 01:04:08 +04:00
2011-05-09 13:39:57 +04:00
if ( epoch > 0 ) {
if ( ( sumConstraintChangeAbs_lastEpoch = = sumConstraintChangeAbs ) & & ( sumStillViolatedConstraints_lastEpoch = = sumStillViolatedConstraints ) ) {
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , sum of violated constraints and constraint changes has stayed the same: " < < sumStillViolatedConstraints < < " , " < < sumConstraintChangeAbs < < endl ;
2011-04-11 02:07:41 +04:00
}
2011-04-11 16:22:19 +04:00
else {
2011-05-09 13:39:57 +04:00
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , sum of violated constraints: " < < sumStillViolatedConstraints < < " , sum of constraint changes " < < sumConstraintChangeAbs < < endl ;
2011-04-11 16:22:19 +04:00
}
2011-05-09 13:39:57 +04:00
}
else {
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , sum of violated constraints: " < < sumStillViolatedConstraints < < endl ;
}
2011-04-11 02:07:41 +04:00
2011-05-09 13:39:57 +04:00
sumConstraintChangeAbs_lastEpoch = sumConstraintChangeAbs ;
sumStillViolatedConstraints_lastEpoch = sumStillViolatedConstraints ;
if ( min_bleu_change > 0 ) {
if ( sumBleuChangeAbs < min_bleu_change ) {
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , sum of BLEU score changes was smaller than " < < min_bleu_change < < " ( " < < sumBleuChangeAbs < < " ). " < < endl ;
stop = true ;
}
else {
cerr < < " Rank " < < rank < < " , epoch " < < epoch < < " , sum of BLEU score changes: " < < sumBleuChangeAbs < < " . " < < endl ;
}
2011-04-10 23:05:36 +04:00
}
if ( ! stop ) {
2011-05-09 13:39:57 +04:00
/* if (devBleu) {
2011-04-08 14:59:41 +04:00
// calculate bleu score of dev set
vector < float > bleuAndRatio = decoder - > calculateBleuOfCorpus ( allBestModelScore , all_ref_ids , epoch , rank ) ;
float bleu = bleuAndRatio [ 0 ] ;
float ratio = bleuAndRatio [ 1 ] ;
for ( size_t i = 0 ; i < allBestModelScore . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < allBestModelScore [ i ] . size ( ) ; + + j ) {
delete allBestModelScore [ i ] [ j ] ;
}
2011-03-23 15:13:38 +03:00
}
2011-04-08 14:59:41 +04:00
if ( rank = = 0 ) {
beforePrevAverageBleu = prevAverageBleu ;
beforePrevAverageApproxBleu = prevAverageApproxBleu ;
prevAverageBleu = averageBleu ;
prevAverageApproxBleu = averageApproxBleu ;
}
2011-03-04 15:24:10 +03:00
# ifdef MPI_ENABLE
2011-04-08 14:59:41 +04:00
// average bleu across processes
sendbuf [ 0 ] = bleu ;
recvbuf [ 0 ] = 0 ;
MPI_Reduce ( sendbuf , recvbuf , 1 , MPI_FLOAT , MPI_SUM , 0 , world ) ;
if ( rank = = 0 ) {
averageBleu = recvbuf [ 0 ] ;
2011-03-23 15:13:38 +03:00
2011-04-08 14:59:41 +04:00
// divide by number of processes
averageBleu / = size ;
cerr < < " Average Bleu (dev) after epoch " < < epoch < < " : " < < averageBleu < < endl ;
2011-03-23 15:13:38 +03:00
}
2011-04-08 14:59:41 +04:00
// average ratio across processes
sendbuf [ 0 ] = ratio ;
recvbuf [ 0 ] = 0 ;
MPI_Reduce ( sendbuf , recvbuf , 1 , MPI_FLOAT , MPI_SUM , 0 , world ) ;
if ( rank = = 0 ) {
averageRatio = recvbuf [ 0 ] ;
// divide by number of processes
averageRatio / = size ;
cerr < < " Average ratio (dev) after epoch " < < epoch < < " : " < < averageRatio < < endl ;
if ( averageRatio > 1.008 & & adapt_BPfactor ) {
BPfactor - = 0.05 ;
decoder - > setBPfactor ( BPfactor ) ;
cerr < < " Change BPfactor to " < < BPfactor < < " .. " < < endl ;
}
else if ( averageRatio > 1.0 & & adapt_BPfactor ) {
BPfactor = 1 ;
decoder - > setBPfactor ( BPfactor ) ;
cerr < < " Change BPfactor to 1.. " < < endl ;
}
2011-03-23 15:13:38 +03:00
}
2011-04-08 14:59:41 +04:00
// average approximate sentence bleu across processes
2011-04-09 01:04:08 +04:00
sendbuf [ 0 ] = summedApproxBleu / numberOfUpdatesThisEpoch ;
2011-04-08 14:59:41 +04:00
recvbuf [ 0 ] = 0 ;
MPI_Reduce ( sendbuf , recvbuf , 1 , MPI_FLOAT , MPI_SUM , 0 , world ) ;
if ( rank = = 0 ) {
averageApproxBleu = recvbuf [ 0 ] ;
2011-03-23 15:13:38 +03:00
2011-04-08 14:59:41 +04:00
// divide by number of processes
averageApproxBleu / = size ;
cerr < < " Average approx. sentence Bleu (dev) after epoch " < < epoch < < " : " < < averageApproxBleu < < endl ;
}
2011-03-04 15:24:10 +03:00
# endif
# ifndef MPI_ENABLE
2011-04-08 14:59:41 +04:00
averageBleu = bleu ;
cerr < < " Average Bleu (dev) after epoch " < < epoch < < " : " < < averageBleu < < endl ;
2011-04-09 01:04:08 +04:00
averageApproxBleu = summedApproxBleu / numberOfUpdatesThisEpoch ;
2011-04-08 14:59:41 +04:00
cerr < < " Average approx. sentence Bleu (dev) after epoch " < < epoch < < " : " < < averageApproxBleu < < endl ;
2011-03-04 15:24:10 +03:00
# endif
2011-04-08 14:59:41 +04:00
if ( rank = = 0 ) {
2011-03-23 15:13:38 +03:00
if ( stop_dev_bleu ) {
2011-04-08 14:59:41 +04:00
if ( averageBleu < = prevAverageBleu & & prevAverageBleu < = beforePrevAverageBleu ) {
stop = true ;
cerr < < " Average Bleu (dev) is decreasing or no more increasing.. stop tuning. " < < endl ;
ScoreComponentCollection dummy ;
ostringstream endfilename ;
endfilename < < " stopping " ;
dummy . Save ( endfilename . str ( ) ) ;
}
}
2011-03-03 13:46:53 +03:00
2011-03-23 15:13:38 +03:00
if ( stop_approx_dev_bleu ) {
2011-04-08 14:59:41 +04:00
if ( averageApproxBleu < = prevAverageApproxBleu & & prevAverageApproxBleu < = beforePrevAverageApproxBleu ) {
stop = true ;
cerr < < " Average approx. sentence Bleu (dev) is decreasing or no more increasing.. stop tuning. " < < endl ;
ScoreComponentCollection dummy ;
ostringstream endfilename ;
endfilename < < " stopping " ;
dummy . Save ( endfilename . str ( ) ) ;
}
}
2011-03-23 15:13:38 +03:00
}
2011-02-24 13:54:16 +03:00
2011-03-07 17:12:36 +03:00
# ifdef MPI_ENABLE
2011-04-08 14:59:41 +04:00
mpi : : broadcast ( world , stop , 0 ) ;
2011-03-07 17:12:36 +03:00
# endif
2011-05-09 13:39:57 +04:00
} // end if (dev_bleu) */
2011-04-08 14:59:41 +04:00
// Test if weights have converged
if ( weightConvergence ) {
bool reached = true ;
if ( rank = = 0 & & ( epoch > = 2 ) ) {
ScoreComponentCollection firstDiff ( mixedAverageWeights ) ;
firstDiff . MinusEquals ( mixedAverageWeightsPrevious ) ;
cerr < < " Average weight changes since previous epoch: " < < firstDiff < < endl ;
ScoreComponentCollection secondDiff ( mixedAverageWeights ) ;
secondDiff . MinusEquals ( mixedAverageWeightsBeforePrevious ) ;
cerr < < " Average weight changes since before previous epoch: " < < secondDiff < < endl < < endl ;
// check whether stopping criterion has been reached
// (both difference vectors must have all weight changes smaller than min_weight_change)
FVector changes1 = firstDiff . GetScoresVector ( ) ;
FVector changes2 = secondDiff . GetScoresVector ( ) ;
FVector : : const_iterator iterator1 = changes1 . cbegin ( ) ;
FVector : : const_iterator iterator2 = changes2 . cbegin ( ) ;
while ( iterator1 ! = changes1 . cend ( ) ) {
if ( abs ( ( * iterator1 ) . second ) > = min_weight_change | | abs (
( * iterator2 ) . second ) > = min_weight_change ) {
reached = false ;
break ;
}
+ + iterator1 ;
+ + iterator2 ;
2011-03-23 15:13:38 +03:00
}
2011-04-08 14:59:41 +04:00
if ( reached ) {
// stop MIRA
stop = true ;
cerr < < " Stopping criterion has been reached after epoch " < < epoch < < " .. stopping MIRA. " < < endl ;
ScoreComponentCollection dummy ;
ostringstream endfilename ;
endfilename < < " stopping " ;
dummy . Save ( endfilename . str ( ) ) ;
}
2011-03-23 15:13:38 +03:00
}
2011-03-28 22:11:45 +04:00
2011-04-08 14:59:41 +04:00
mixedAverageWeightsBeforePrevious = mixedAverageWeightsPrevious ;
mixedAverageWeightsPrevious = mixedAverageWeights ;
2011-03-04 15:24:10 +03:00
# ifdef MPI_ENABLE
2011-04-08 14:59:41 +04:00
mpi : : broadcast ( world , stop , 0 ) ;
2011-03-04 15:24:10 +03:00
# endif
2011-04-08 14:59:41 +04:00
} //end if (weightConvergence)
// if using flexible margin scale factor, increase scaling (decrease value) for next epoch
if ( marginScaleFactorStep > 0 ) {
if ( marginScaleFactor - marginScaleFactorStep > = marginScaleFactorMin ) {
if ( typeid ( * optimiser ) = = typeid ( MiraOptimiser ) ) {
marginScaleFactor - = marginScaleFactorStep ;
cerr < < " Change margin scale factor to: " < < marginScaleFactor < < endl ;
( ( MiraOptimiser * ) optimiser ) - > setMarginScaleFactor ( marginScaleFactor ) ;
}
2011-03-23 15:13:38 +03:00
}
}
2011-05-09 13:39:57 +04:00
// if using flexible regularization, decrease regularization parameter for next epoch
2011-04-08 14:59:41 +04:00
if ( slack_step > 0 ) {
2011-05-09 13:39:57 +04:00
if ( slack - slack_step > = slack_min ) {
2011-04-08 14:59:41 +04:00
if ( typeid ( * optimiser ) = = typeid ( MiraOptimiser ) ) {
2011-05-09 13:39:57 +04:00
slack - = slack_step ;
2011-04-08 14:59:41 +04:00
cerr < < " Change slack to: " < < slack < < endl ;
( ( MiraOptimiser * ) optimiser ) - > setSlack ( slack ) ;
}
2011-03-23 15:13:38 +03:00
}
}
2011-04-08 14:59:41 +04:00
// change learning rate
if ( ( decrease_learning_rate > 0 ) & & ( learning_rate - decrease_learning_rate > = min_learning_rate ) ) {
learning_rate - = decrease_learning_rate ;
if ( learning_rate < = 0.0001 ) {
learning_rate = 0 ;
stop = true ;
2011-03-18 18:49:48 +03:00
# ifdef MPI_ENABLE
2011-04-08 14:59:41 +04:00
mpi : : broadcast ( world , stop , 0 ) ;
2011-03-18 18:49:48 +03:00
# endif
2011-04-08 14:59:41 +04:00
}
cerr < < " Change learning rate to " < < learning_rate < < endl ;
2011-03-23 15:13:38 +03:00
}
2011-04-08 14:59:41 +04:00
// change maximum sentence update
if ( ( decrease_sentence_update > 0 ) & & ( max_sentence_update - decrease_sentence_update > = min_sentence_update ) ) {
max_sentence_update - = decrease_sentence_update ;
if ( max_sentence_update < = 0.0001 ) {
max_sentence_update = 0 ;
stop = true ;
2011-03-18 18:49:48 +03:00
# ifdef MPI_ENABLE
2011-04-08 14:59:41 +04:00
mpi : : broadcast ( world , stop , 0 ) ;
2011-03-18 18:49:48 +03:00
# endif
2011-04-08 14:59:41 +04:00
}
cerr < < " Change maximum sentence update to " < < max_sentence_update < < endl ;
2011-03-23 15:13:38 +03:00
}
}
} // end of epoch loop
2011-03-07 17:12:36 +03:00
2010-12-01 21:09:49 +03:00
# ifdef MPI_ENABLE
2011-03-23 15:13:38 +03:00
MPI_Finalize ( ) ;
2010-12-06 18:28:51 +03:00
# endif
2011-03-23 15:13:38 +03:00
now = time ( 0 ) ; // get current time
tm = localtime ( & now ) ; // get struct filled out
cerr < < " \n End date/time: " < < tm - > tm_mon + 1 < < " / " < < tm - > tm_mday
< < " / " < < tm - > tm_year + 1900 < < " , " < < tm - > tm_hour < < " : "
< < tm - > tm_min < < " : " < < tm - > tm_sec < < endl ;
2010-09-15 18:36:07 +04:00
2011-03-23 15:13:38 +03:00
delete decoder ;
exit ( 0 ) ;
2010-09-15 18:36:07 +04:00
}
2010-09-16 20:23:52 +04:00