2010-09-15 18:36:07 +04:00
/***********************************************************************
Moses - factored phrase - based language decoder
Copyright ( C ) 2010 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-09-28 19:13:50 +04:00
# include <algorithm>
2010-09-15 19:38:46 +04:00
# include <cstdlib>
# include <ctime>
2010-09-15 18:36:07 +04:00
# include <string>
# include <vector>
# include <boost/program_options.hpp>
2010-09-28 19:13:50 +04:00
# ifdef MPI_ENABLE
# include <boost/mpi.hpp>
namespace mpi = boost : : mpi ;
# endif
2010-09-15 18:36:07 +04:00
2010-09-15 19:38:46 +04:00
# include "FeatureVector.h"
2010-09-15 18:36:07 +04:00
# include "StaticData.h"
2010-09-16 20:23:52 +04:00
# include "ChartTrellisPathList.h"
2010-09-17 11:35:31 +04:00
# include "ChartTrellisPath.h"
# include "ScoreComponentCollection.h"
2010-09-15 18:36:07 +04:00
# include "Decoder.h"
2010-09-15 19:38:46 +04:00
# include "Optimiser.h"
2010-09-15 18:36:07 +04:00
using namespace Mira ;
using namespace std ;
using namespace Moses ;
namespace po = boost : : program_options ;
2010-09-16 20:23:52 +04:00
void OutputNBestList ( const MosesChart : : TrellisPathList & nBestList , const TranslationSystem * system , long translationId ) ;
2010-09-15 19:38:46 +04:00
bool loadSentences ( const string & filename , vector < string > & sentences ) {
ifstream in ( filename . c_str ( ) ) ;
if ( ! in ) return false ;
string line ;
while ( getline ( in , line ) ) {
sentences . push_back ( line ) ;
}
return true ;
}
2010-09-28 19:13:50 +04:00
struct RandomIndex {
ptrdiff_t operator ( ) ( ptrdiff_t max ) {
return static_cast < ptrdiff_t > ( rand ( ) % max ) ;
}
} ;
2010-09-15 18:36:07 +04:00
int main ( int argc , char * * argv ) {
2010-09-28 19:13:50 +04:00
size_t rank = 0 ; size_t size = 1 ;
# ifdef MPI_ENABLE
mpi : : environment env ( argc , argv ) ;
mpi : : communicator world ;
rank = world . rank ( ) ;
size = world . size ( ) ;
# endif
cerr < < " Rank: " < < rank < < " Size: " < < size < < endl ;
2010-09-15 18:36:07 +04:00
bool help ;
int verbosity ;
string mosesConfigFile ;
string inputFile ;
vector < string > referenceFiles ;
2010-09-28 19:13:50 +04:00
size_t epochs ;
string learner ;
2010-10-28 14:00:35 +04:00
bool shuffle ;
2010-10-29 19:41:37 +04:00
bool hildreth ;
2010-09-28 19:13:50 +04:00
size_t mixFrequency ;
size_t weightDumpFrequency ;
2010-10-28 16:41:33 +04:00
string weightDumpStem ;
2010-11-16 14:44:44 +03:00
float marginScaleFactor ;
2010-11-24 20:06:54 +03:00
size_t n ;
size_t batchSize ;
2010-11-29 17:11:19 +03:00
bool distinctNbest ;
2010-11-16 21:52:08 +03:00
bool onlyViolatedConstraints ;
2010-11-22 23:36:14 +03:00
bool accumulateWeights ;
bool useScaledReference ;
2010-11-23 18:59:36 +03:00
bool scaleByInputLength ;
2010-11-29 17:40:15 +03:00
bool increaseBP ;
2010-11-29 21:42:18 +03:00
bool regulariseHildrethUpdates ;
2010-11-09 20:31:30 +03:00
float clipping ;
2010-11-18 19:24:51 +03:00
bool fixedClipping ;
2010-09-15 18:36:07 +04:00
po : : options_description desc ( " Allowed options " ) ;
desc . add_options ( )
( " help " , po : : value ( & help ) - > zero_tokens ( ) - > default_value ( false ) , " Print this help message and exit " )
( " config,f " , po : : value < string > ( & mosesConfigFile ) , " Moses ini file " )
( " verbosity,v " , po : : value < int > ( & verbosity ) - > default_value ( 0 ) , " Verbosity level " )
( " input-file,i " , po : : value < string > ( & inputFile ) , " Input file containing tokenised source " )
2010-09-28 19:13:50 +04:00
( " reference-files,r " , po : : value < vector < string > > ( & referenceFiles ) , " Reference translation files for training " )
( " epochs,e " , po : : value < size_t > ( & epochs ) - > default_value ( 1 ) , " Number of epochs " )
( " learner,l " , po : : value < string > ( & learner ) - > default_value ( " mira " ) , " Learning algorithm " )
( " mix-frequency " , po : : value < size_t > ( & mixFrequency ) - > default_value ( 1 ) , " How often per epoch to mix weights, when using mpi " )
2010-10-28 16:41:33 +04:00
( " weight-dump-stem " , po : : value < string > ( & weightDumpStem ) - > default_value ( " weights " ) , " Stem of filename to use for dumping weights " )
2010-10-25 16:22:35 +04:00
( " weight-dump-frequency " , po : : value < size_t > ( & weightDumpFrequency ) - > default_value ( 1 ) , " How often per epoch to dump weights " )
2010-10-29 19:41:37 +04:00
( " shuffle " , po : : value < bool > ( & shuffle ) - > default_value ( false ) , " Shuffle input sentences before processing " )
2010-11-09 20:31:30 +03:00
( " hildreth " , po : : value < bool > ( & hildreth ) - > default_value ( true ) , " Use Hildreth's optimisation algorithm " )
2010-11-16 14:44:44 +03:00
( " margin-scale-factor,m " , po : : value < float > ( & marginScaleFactor ) - > default_value ( 1.0 ) , " Margin scale factor, regularises the update by scaling the enforced margin " )
2010-11-24 20:06:54 +03:00
( " nbest,n " , po : : value < size_t > ( & n ) - > default_value ( 10 ) , " Number of translations in nbest list " )
( " batch-size,b " , po : : value < size_t > ( & batchSize ) - > default_value ( 1 ) , " Size of batch that is send to optimiser for weight adjustments " )
2010-11-29 17:40:15 +03:00
( " distinct-nbest " , po : : value < bool > ( & distinctNbest ) - > default_value ( false ) , " Use nbest list with distinct translations in inference step " )
2010-11-16 21:52:08 +03:00
( " only-violated-constraints " , po : : value < bool > ( & onlyViolatedConstraints ) - > default_value ( false ) , " Add only violated constraints to the optimisation problem " )
2010-11-22 23:36:14 +03:00
( " accumulate-weights " , po : : value < bool > ( & accumulateWeights ) - > default_value ( false ) , " Accumulate and average weights over all epochs " )
( " use-scaled-reference " , po : : value < bool > ( & useScaledReference ) - > default_value ( true ) , " Use scaled reference length for comparing target and reference length of phrases " )
2010-11-23 18:59:36 +03:00
( " scale-by-input-length " , po : : value < bool > ( & scaleByInputLength ) - > default_value ( true ) , " Scale the BLEU score by a history of the input lengths " )
2010-11-29 17:40:15 +03:00
( " increase-BP " , po : : value < bool > ( & increaseBP ) - > default_value ( false ) , " Increase penalty for short translations " )
2010-11-29 21:42:18 +03:00
( " regularise-hildreth-updates " , po : : value < bool > ( & regulariseHildrethUpdates ) - > default_value ( false ) , " Regularise Hildreth updates with the value set for clipping " )
( " clipping " , po : : value < float > ( & clipping ) - > default_value ( 0.01f ) , " Set a threshold to regularise updates " )
2010-11-18 19:24:51 +03:00
( " fixed-clipping " , po : : value < bool > ( & fixedClipping ) - > default_value ( false ) , " Use a fixed clipping threshold with SMO (instead of adaptive) " ) ;
2010-09-15 18:36:07 +04:00
2010-11-22 23:36:14 +03:00
2010-09-15 18:36:07 +04:00
po : : options_description cmdline_options ;
cmdline_options . add ( desc ) ;
po : : variables_map vm ;
po : : store ( po : : command_line_parser ( argc , argv ) .
options ( cmdline_options ) . run ( ) , vm ) ;
po : : notify ( vm ) ;
if ( help ) {
2010-09-15 19:38:46 +04:00
std : : cout < < " Usage: " + string ( argv [ 0 ] ) + " -f mosesini-file -i input-file -r reference-file(s) [options] " < < std : : endl ;
2010-09-15 18:36:07 +04:00
std : : cout < < desc < < std : : endl ;
return 0 ;
}
if ( mosesConfigFile . empty ( ) ) {
cerr < < " Error: No moses ini file specified " < < endl ;
return 1 ;
}
if ( inputFile . empty ( ) ) {
cerr < < " Error: No input file specified " < < endl ;
return 1 ;
}
if ( ! referenceFiles . size ( ) ) {
cerr < < " Error: No reference files specified " < < endl ;
return 1 ;
}
2010-11-09 20:31:30 +03:00
// load input and references
2010-09-15 19:38:46 +04:00
vector < string > inputSentences ;
if ( ! loadSentences ( inputFile , inputSentences ) ) {
cerr < < " Error: Failed to load input sentences from " < < inputFile < < endl ;
return 1 ;
}
vector < vector < string > > referenceSentences ( referenceFiles . size ( ) ) ;
for ( size_t i = 0 ; i < referenceFiles . size ( ) ; + + i ) {
if ( ! loadSentences ( referenceFiles [ i ] , referenceSentences [ i ] ) ) {
cerr < < " Error: Failed to load reference sentences from " < < referenceFiles [ i ] < < endl ;
return 1 ;
}
if ( referenceSentences [ i ] . size ( ) ! = inputSentences . size ( ) ) {
cerr < < " Error: Input file length ( " < < inputSentences . size ( ) < <
" ) != ( " < < referenceSentences [ i ] . size ( ) < < " ) length of reference file " < < i < <
endl ;
return 1 ;
}
}
2010-10-25 16:22:35 +04:00
2010-11-24 20:06:54 +03:00
// initialise Moses
2010-09-16 11:49:12 +04:00
initMoses ( mosesConfigFile , verbosity ) ; //, argc, argv);
2010-11-29 17:40:15 +03:00
MosesDecoder * decoder = new MosesDecoder ( referenceSentences , useScaledReference , scaleByInputLength , increaseBP ) ;
2010-10-25 16:22:35 +04:00
ScoreComponentCollection startWeights = decoder - > getWeights ( ) ;
2010-11-12 18:35:10 +03:00
startWeights . L1Normalise ( ) ;
decoder - > setWeights ( startWeights ) ;
2010-10-25 16:22:35 +04:00
2010-11-09 20:31:30 +03:00
// Optionally shuffle the sentences
2010-09-28 19:13:50 +04:00
vector < size_t > order ;
if ( rank = = 0 ) {
2010-10-28 14:00:35 +04:00
for ( size_t i = 0 ; i < inputSentences . size ( ) ; + + i ) {
2010-09-28 19:13:50 +04:00
order . push_back ( i ) ;
}
2010-10-28 14:00:35 +04:00
2010-09-28 19:13:50 +04:00
if ( shuffle ) {
2010-11-01 19:56:10 +03:00
cerr < < " Shuffling input sentences.. " < < endl ;
2010-10-28 14:00:35 +04:00
RandomIndex rindex ;
random_shuffle ( order . begin ( ) , order . end ( ) , rindex ) ;
2010-09-28 19:13:50 +04:00
}
}
# ifdef MPI_ENABLE
2010-11-09 20:31:30 +03:00
mpi : : broadcast ( world , order , 0 ) ;
2010-09-28 19:13:50 +04:00
# endif
2010-11-09 20:31:30 +03:00
// Create the shards according to the number of processes used
2010-09-28 19:13:50 +04:00
vector < size_t > shard ;
float shardSize = ( float ) ( order . size ( ) ) / size ;
VERBOSE ( 1 , " Shard size: " < < shardSize < < endl ) ;
size_t shardStart = ( size_t ) ( shardSize * rank ) ;
size_t shardEnd = ( size_t ) ( shardSize * ( rank + 1 ) ) ;
if ( rank = = size - 1 ) shardEnd = order . size ( ) ;
2010-11-09 20:31:30 +03:00
VERBOSE ( 1 , " Rank: " < < rank < < " Shard start: " < < shardStart < < " Shard end: " < < shardEnd < < endl ) ;
2010-09-28 19:13:50 +04:00
shard . resize ( shardSize ) ;
copy ( order . begin ( ) + shardStart , order . begin ( ) + shardEnd , shard . begin ( ) ) ;
2010-10-25 16:22:35 +04:00
Optimiser * optimiser = NULL ;
2010-11-16 21:52:08 +03:00
cerr < < " Nbest list size: " < < n < < endl ;
2010-11-29 17:40:15 +03:00
cerr < < " Distinct translations in nbest list? " < < distinctNbest < < endl ;
2010-10-25 16:22:35 +04:00
if ( learner = = " mira " ) {
cerr < < " Optimising using Mira " < < endl ;
2010-11-29 21:42:18 +03:00
optimiser = new MiraOptimiser ( n , hildreth , marginScaleFactor , onlyViolatedConstraints , clipping , fixedClipping , regulariseHildrethUpdates ) ;
2010-10-29 19:41:37 +04:00
if ( hildreth ) {
cerr < < " Using Hildreth's optimisation algorithm.. " < < endl ;
}
else {
cerr < < " Using some sort of SMO.. " < < endl ;
2010-10-28 14:00:35 +04:00
}
2010-11-19 14:35:16 +03:00
cerr < < " Margin scale factor: " < < marginScaleFactor < < endl ;
cerr < < " Add only violated constraints? " < < onlyViolatedConstraints < < endl ;
2010-10-25 16:22:35 +04:00
} else if ( learner = = " perceptron " ) {
cerr < < " Optimising using Perceptron " < < endl ;
optimiser = new Perceptron ( ) ;
} else {
cerr < < " Error: Unknown optimiser: " < < learner < < endl ;
}
2010-09-15 18:36:07 +04:00
//Main loop:
2010-10-25 16:22:35 +04:00
ScoreComponentCollection cumulativeWeights ; // collect weights per epoch to produce an average
2010-09-28 19:13:50 +04:00
size_t iterations = 0 ;
2010-11-23 18:11:32 +03:00
size_t iterationsThisEpoch = 0 ;
2010-09-15 20:09:43 +04:00
2010-10-25 16:22:35 +04:00
time_t now = time ( 0 ) ; // get current time
struct tm * tm = localtime ( & now ) ; // get struct filled out
2010-11-01 19:56:10 +03:00
cerr < < " Start date/time: " < < tm - > tm_mon + 1 < < " / " < < tm - > tm_mday < < " / " < < tm - > tm_year + 1900
2010-10-25 16:22:35 +04:00
< < " , " < < tm - > tm_hour < < " : " < < tm - > tm_min < < " : " < < tm - > tm_sec < < endl ;
2010-11-09 20:31:30 +03:00
// the result of accumulating and averaging weights over one epoch and possibly several processes
ScoreComponentCollection averageTotalWeights ;
2010-11-12 18:40:53 +03:00
// TODO: scaling of feature values for probabilistic features
2010-11-09 20:31:30 +03:00
for ( size_t epoch = 0 ; epoch < epochs ; + + epoch ) {
cerr < < " \n Epoch " < < epoch < < endl ;
// Sum up weights over one epoch, final average uses weights from last epoch
2010-11-23 18:11:32 +03:00
iterationsThisEpoch = 0 ;
2010-11-22 23:36:14 +03:00
if ( ! accumulateWeights ) {
cumulativeWeights . ZeroAll ( ) ;
}
2010-10-28 14:00:35 +04:00
2010-11-24 20:06:54 +03:00
// number of weight dumps this epoch
2010-11-09 20:31:30 +03:00
size_t weightEpochDump = 0 ;
2010-10-25 16:22:35 +04:00
size_t shardPosition = 0 ;
2010-11-24 20:06:54 +03:00
vector < size_t > : : const_iterator sid = shard . begin ( ) ;
while ( sid ! = shard . end ( ) ) {
2010-10-25 16:22:35 +04:00
// feature values for hypotheses i,j (matrix: batchSize x 3*n x featureValues)
2010-11-24 20:06:54 +03:00
vector < vector < ScoreComponentCollection > > featureValues ;
vector < vector < float > > bleuScores ;
// BATCHING: produce nbest lists for all input sentences in batch
vector < size_t > oraclePositions ;
vector < float > oracleBleuScores ;
vector < vector < const Word * > > oracles ;
vector < ScoreComponentCollection > oracleFeatureValues ;
vector < size_t > inputLengths ;
vector < size_t > ref_ids ;
size_t actualBatchSize = 0 ;
for ( size_t batchPosition = 0 ; batchPosition < batchSize & & sid ! = shard . end ( ) ; + + batchPosition ) {
const string & input = inputSentences [ * sid ] ;
const vector < string > & refs = referenceSentences [ * sid ] ;
cerr < < " \n Batch position " < < batchPosition < < endl ;
cerr < < " Input sentence " < < * sid < < " : \" " < < input < < " \" " < < endl ;
vector < ScoreComponentCollection > newFeatureValues ;
vector < float > newBleuScores ;
featureValues . push_back ( newFeatureValues ) ;
bleuScores . push_back ( newBleuScores ) ;
// MODEL
cerr < < " Run decoder to get nbest wrt model score " < < endl ;
vector < const Word * > bestModel = decoder - > getNBest ( input ,
2010-09-28 19:13:50 +04:00
* sid ,
2010-10-25 16:22:35 +04:00
n ,
2010-09-17 18:32:27 +04:00
0.0 ,
1.0 ,
2010-11-24 20:06:54 +03:00
featureValues [ batchPosition ] ,
bleuScores [ batchPosition ] ,
2010-11-29 17:11:19 +03:00
true ,
distinctNbest ) ;
2010-11-24 20:06:54 +03:00
inputLengths . push_back ( decoder - > getCurrentInputLength ( ) ) ;
ref_ids . push_back ( * sid ) ;
decoder - > cleanup ( ) ;
for ( size_t i = 0 ; i < bestModel . size ( ) ; + + i ) {
cerr < < * ( bestModel [ i ] ) < < " " ;
}
cerr < < endl ;
cerr < < " model length: " < < bestModel . size ( ) < < " Bleu: " < < bleuScores [ batchPosition ] [ 0 ] < < endl ;
// HOPE
cerr < < " Run decoder to get nbest hope translations " < < endl ;
size_t oraclePos = featureValues [ batchPosition ] . size ( ) ;
oraclePositions . push_back ( oraclePos ) ;
vector < const Word * > oracle = decoder - > getNBest ( input ,
2010-10-25 16:22:35 +04:00
* sid ,
n ,
2010-09-17 18:32:27 +04:00
1.0 ,
1.0 ,
2010-11-24 20:06:54 +03:00
featureValues [ batchPosition ] ,
bleuScores [ batchPosition ] ,
2010-11-29 17:11:19 +03:00
true ,
distinctNbest ) ;
2010-11-24 20:06:54 +03:00
decoder - > cleanup ( ) ;
oracles . push_back ( oracle ) ;
for ( size_t i = 0 ; i < oracle . size ( ) ; + + i ) {
//oracles[batchPosition].push_back(oracle[i]);
cerr < < * ( oracle [ i ] ) < < " " ;
}
cerr < < endl ;
cerr < < " oracle length: " < < oracle . size ( ) < < " Bleu: " < < bleuScores [ batchPosition ] [ oraclePos ] < < endl ;
2010-09-17 18:32:27 +04:00
2010-11-24 20:06:54 +03:00
oracleFeatureValues . push_back ( featureValues [ batchPosition ] [ oraclePos ] ) ;
float oracleBleuScore = bleuScores [ batchPosition ] [ oraclePos ] ;
oracleBleuScores . push_back ( oracleBleuScore ) ;
2010-10-29 19:41:37 +04:00
2010-11-24 20:06:54 +03:00
// FEAR
cerr < < " Run decoder to get nbest fear translations " < < endl ;
size_t fearPos = featureValues [ batchPosition ] . size ( ) ;
vector < const Word * > fear = decoder - > getNBest ( input ,
2010-09-28 19:13:50 +04:00
* sid ,
2010-10-25 16:22:35 +04:00
n ,
2010-09-17 18:32:27 +04:00
- 1.0 ,
1.0 ,
2010-11-24 20:06:54 +03:00
featureValues [ batchPosition ] ,
bleuScores [ batchPosition ] ,
2010-11-29 17:11:19 +03:00
true ,
distinctNbest ) ;
2010-11-24 20:06:54 +03:00
decoder - > cleanup ( ) ;
for ( size_t i = 0 ; i < fear . size ( ) ; + + i ) {
cerr < < * ( fear [ i ] ) < < " " ;
}
cerr < < endl ;
cerr < < " fear length: " < < fear . size ( ) < < " Bleu: " < < bleuScores [ batchPosition ] [ fearPos ] < < endl ;
for ( size_t i = 0 ; i < bestModel . size ( ) ; + + i ) {
delete bestModel [ i ] ;
}
for ( size_t i = 0 ; i < fear . size ( ) ; + + i ) {
delete fear [ i ] ;
}
// next input sentence
+ + sid ;
+ + actualBatchSize ;
2010-11-29 22:09:34 +03:00
+ + shardPosition ;
2010-11-23 18:11:32 +03:00
}
2010-10-25 16:22:35 +04:00
// Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis)
2010-11-24 20:06:54 +03:00
vector < vector < float > > losses ( actualBatchSize ) ;
for ( size_t batchPosition = 0 ; batchPosition < actualBatchSize ; + + batchPosition ) {
for ( size_t j = 0 ; j < bleuScores [ batchPosition ] . size ( ) ; + + j ) {
losses [ batchPosition ] . push_back ( oracleBleuScores [ batchPosition ] - bleuScores [ batchPosition ] [ j ] ) ;
2010-10-25 16:22:35 +04:00
}
}
// get weight vector and set weight for bleu feature to 0
ScoreComponentCollection mosesWeights = decoder - > getWeights ( ) ;
const vector < const ScoreProducer * > featureFunctions = StaticData : : Instance ( ) . GetTranslationSystem ( TranslationSystem : : DEFAULT ) . GetFeatureFunctions ( ) ;
mosesWeights . Assign ( featureFunctions . back ( ) , 0 ) ;
2010-11-24 20:06:54 +03:00
2010-11-18 19:24:51 +03:00
if ( ! hildreth & & typeid ( * optimiser ) = = typeid ( MiraOptimiser ) ) {
2010-11-24 20:06:54 +03:00
( ( MiraOptimiser * ) optimiser ) - > setOracleIndices ( oraclePositions ) ;
2010-11-18 19:24:51 +03:00
}
2010-11-24 20:06:54 +03:00
// run optimiser on batch
cerr < < " \n Run optimiser.. " < < endl ;
ScoreComponentCollection oldWeights ( mosesWeights ) ;
2010-11-30 20:26:34 +03:00
int constraintChange = optimiser - > updateWeights ( mosesWeights , featureValues , losses , oracleFeatureValues ) ;
2010-09-28 19:13:50 +04:00
2010-11-24 20:06:54 +03:00
// update moses weights
2010-10-25 16:22:35 +04:00
mosesWeights . L1Normalise ( ) ;
decoder - > setWeights ( mosesWeights ) ;
2010-11-24 20:06:54 +03:00
// update history (for approximate document bleu)
decoder - > updateHistory ( oracles , inputLengths , ref_ids ) ;
// clean up oracle translations after updating history
for ( size_t i = 0 ; i < oracles . size ( ) ; + + i ) {
for ( size_t j = 0 ; j < oracles [ i ] . size ( ) ; + + j ) {
delete oracles [ i ] [ j ] ;
}
}
2010-10-25 16:22:35 +04:00
cumulativeWeights . PlusEquals ( mosesWeights ) ;
2010-10-28 14:00:35 +04:00
// sanity check: compare margin created by old weights against new weights
2010-11-09 20:31:30 +03:00
float lossMinusMargin_old = 0 ;
float lossMinusMargin_new = 0 ;
2010-11-24 20:06:54 +03:00
for ( size_t batchPosition = 0 ; batchPosition < actualBatchSize ; + + batchPosition ) {
for ( size_t j = 0 ; j < featureValues [ batchPosition ] . size ( ) ; + + j ) {
ScoreComponentCollection featureDiff ( oracleFeatureValues [ batchPosition ] ) ;
featureDiff . MinusEquals ( featureValues [ batchPosition ] [ j ] ) ;
// old weights
float margin = featureDiff . InnerProduct ( oldWeights ) ;
lossMinusMargin_old + = ( losses [ batchPosition ] [ j ] - margin ) ;
// new weights
margin = featureDiff . InnerProduct ( mosesWeights ) ;
lossMinusMargin_new + = ( losses [ batchPosition ] [ j ] - margin ) ;
}
2010-10-25 16:22:35 +04:00
}
2010-11-29 18:07:38 +03:00
cerr < < " \n Constraint change: " < < constraintChange < < endl ;
cerr < < " Summed (loss - margin) with old weights: " < < lossMinusMargin_old < < endl ;
2010-11-09 20:31:30 +03:00
cerr < < " Summed (loss - margin) with new weights: " < < lossMinusMargin_new < < endl ;
if ( lossMinusMargin_new > lossMinusMargin_old ) {
cerr < < " Worsening: " < < lossMinusMargin_new - lossMinusMargin_old < < endl ;
2010-11-29 18:07:38 +03:00
if ( constraintChange < 0 ) {
cerr < < " Something is going wrong here.. " < < endl ;
}
2010-11-09 20:31:30 +03:00
}
2010-10-25 16:22:35 +04:00
2010-11-29 22:09:34 +03:00
+ + iterations ;
2010-11-23 18:11:32 +03:00
+ + iterationsThisEpoch ;
2010-10-25 16:22:35 +04:00
2010-11-24 20:06:54 +03:00
// mix weights?
2010-09-28 19:13:50 +04:00
# ifdef MPI_ENABLE
2010-10-25 16:22:35 +04:00
if ( shardPosition % ( shard . size ( ) / mixFrequency ) = = 0 ) {
ScoreComponentCollection averageWeights ;
2010-11-12 18:22:28 +03:00
VERBOSE ( 1 , " \n Rank: " < < rank < < " \n Before mixing: " < < mosesWeights < < endl ) ;
2010-11-09 20:31:30 +03:00
// collect all weights in averageWeights and divide by number of processes
mpi : : reduce ( world , mosesWeights , averageWeights , SCCPlus ( ) , 0 ) ;
2010-10-25 16:22:35 +04:00
if ( rank = = 0 ) {
2010-11-09 20:31:30 +03:00
averageWeights . DivideEquals ( size ) ;
2010-11-12 18:22:28 +03:00
VERBOSE ( 1 , " After mixing: " < < averageWeights < < endl ) ;
// normalise weights after averaging
averageWeights . L1Normalise ( ) ;
2010-10-25 16:22:35 +04:00
}
2010-11-09 20:31:30 +03:00
// broadcast average weights from process 0
mpi : : broadcast ( world , averageWeights , 0 ) ;
2010-10-25 16:22:35 +04:00
decoder - > setWeights ( averageWeights ) ;
}
2010-09-28 19:13:50 +04:00
# endif
2010-11-24 20:06:54 +03:00
// dump weights?
2010-10-25 16:22:35 +04:00
if ( shardPosition % ( shard . size ( ) / weightDumpFrequency ) = = 0 ) {
2010-11-09 20:31:30 +03:00
// compute average weights per process over iterations
2010-10-25 16:22:35 +04:00
ScoreComponentCollection totalWeights ( cumulativeWeights ) ;
2010-11-23 18:11:32 +03:00
if ( accumulateWeights )
totalWeights . DivideEquals ( iterations ) ;
else
totalWeights . DivideEquals ( iterationsThisEpoch ) ;
2010-11-09 20:31:30 +03:00
// average across processes
2010-10-29 19:41:37 +04:00
# ifdef MPI_ENABLE
2010-11-09 20:31:30 +03:00
mpi : : reduce ( world , totalWeights , averageTotalWeights , SCCPlus ( ) , 0 ) ;
2010-11-24 20:06:54 +03:00
if ( rank = = 0 ) {
// average and normalise weights
2010-11-09 20:31:30 +03:00
averageTotalWeights . DivideEquals ( size ) ;
2010-11-12 18:22:28 +03:00
averageTotalWeights . L1Normalise ( ) ;
2010-11-24 20:06:54 +03:00
}
# endif
# ifndef MPI_ENABLE
// or use weights from single process
averageTotalWeights = totalWeights ;
# endif
if ( ! weightDumpStem . empty ( ) ) {
2010-10-29 19:41:37 +04:00
ostringstream filename ;
filename < < weightDumpStem < < " _ " < < epoch ;
if ( weightDumpFrequency > 1 ) {
filename < < " _ " < < weightEpochDump ;
}
2010-11-12 18:22:28 +03:00
2010-10-29 19:41:37 +04:00
VERBOSE ( 1 , " Dumping weights for epoch " < < epoch < < " to " < < filename . str ( ) < < endl ) ;
2010-11-09 20:31:30 +03:00
averageTotalWeights . Save ( filename . str ( ) ) ;
2010-10-29 19:41:37 +04:00
+ + weightEpochDump ;
2010-10-25 16:22:35 +04:00
}
}
}
2010-09-15 19:38:46 +04:00
}
2010-09-15 18:36:07 +04:00
2010-11-24 20:28:08 +03:00
/*#ifdef MPI_ENABLE
2010-11-24 20:06:54 +03:00
mpi : : finalize ( ) ;
2010-11-24 20:28:08 +03:00
# endif* /
2010-11-24 20:06:54 +03:00
cerr < < " Average total weights: " < < averageTotalWeights < < endl ;
2010-09-15 18:36:07 +04:00
2010-10-28 14:00:35 +04:00
now = time ( 0 ) ; // get current time
2010-10-25 16:22:35 +04:00
tm = localtime ( & now ) ; // get struct filled out
2010-11-01 19:56:10 +03:00
cerr < < " End date/time: " < < tm - > tm_mon + 1 < < " / " < < tm - > tm_mday < < " / " < < tm - > tm_year + 1900
2010-10-25 19:16:34 +04:00
< < " , " < < tm - > tm_hour < < " : " < < tm - > tm_min < < " : " < < tm - > tm_sec < < endl ;
2010-09-15 18:36:07 +04:00
2010-10-25 19:16:34 +04:00
delete decoder ;
2010-09-15 19:38:46 +04:00
exit ( 0 ) ;
2010-09-15 18:36:07 +04:00
}
2010-09-16 20:23:52 +04:00