add parameter --delay-updates

2024-09-20 07:42:21 +03:00 · 2012-01-14 15:56:16 +00:00 · 2012-01-14 15:56:16 +00:00 · a050992abd
commit a050992abd
parent 1551f44879
4 changed files with 50 additions and 19 deletions
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@ -121,6 +121,7 @@ int main(int argc, char** argv) {
 	float max_length_dev_reference;
 	float relax_BP;
 	bool stabiliseLength;
+	bool delayUpdates;
 	po::options_description desc("Allowed options");
 	desc.add_options()
 		("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
@ -135,6 +136,7 @@ int main(int argc, char** argv) {
 		("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
 		("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
 		("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
+		("delay-updates", po::value<bool>(&delayUpdates)->default_value(false), "Delay all updates until the end of an epoch")
 		("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
 		("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
 		("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
@ -413,6 +415,9 @@ int main(int argc, char** argv) {
 	// when length ratio >= 1, set this to true
 	bool fixLength = false;

+	// for accumulating delayed updates
+	ScoreComponentCollection delayedWeightUpdates;
+
 	bool stop = false;
 //	int sumStillViolatedConstraints;
 	float *sendbuf, *recvbuf;
@ -432,8 +437,10 @@ int main(int argc, char** argv) {
 		size_t weightEpochDump = 0;

 		// sum lengths of dev hypothesis/references to calculate translation length ratio for this epoch
-		size_t dev_hypothesis_length;
-		size_t dev_reference_length;
+		size_t dev_hypothesis_length = 0;
+		size_t dev_reference_length = 0;
+
+		delayedWeightUpdates.ZeroAll();

 		size_t shardPosition = 0;
 		vector<size_t>::const_iterator sid = shard.begin();
@ -682,24 +689,28 @@ int main(int argc, char** argv) {
 				// Run optimiser on batch:
 				VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
 				size_t update_status;
+				ScoreComponentCollection weightUpdate;
 				if (perceptron_update) {
 					vector<vector<float> > dummy1;
-					update_status = optimiser->updateWeightsHopeFear(mosesWeights,
+					update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
 							featureValuesHope, featureValuesFear, dummy1, dummy1, learning_rate, rank, epoch);
 				}
 				else if (hope_fear) {
-					update_status = optimiser->updateWeightsHopeFear(mosesWeights,
+					update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
 							featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, learning_rate, rank, epoch);
 				}
 				else {
 					// model_hope_fear
-					update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights,
+					update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, weightUpdate,
 							featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, learning_rate, rank, epoch);
 				}

 //			sumStillViolatedConstraints += update_status;

 				if (update_status == 0) {	 // if weights were updated
+					// apply weight update
+					mosesWeights.PlusEquals(weightUpdate);
+
 					if (normaliseWeights) {
 						mosesWeights.L1Normalise();
 					}
@ -718,8 +729,11 @@ int main(int argc, char** argv) {
 						mosesWeights = averageWeights;
 					}

-					// set new Moses weights
-					decoder->setWeights(mosesWeights);
+					if (delayUpdates)
+						delayedWeightUpdates.PlusEquals(weightUpdate);
+					else
+						// set new Moses weights
+						decoder->setWeights(mosesWeights);
 				}

 				// update history (for approximate document Bleu)
@ -830,8 +844,17 @@ int main(int argc, char** argv) {
 			    }
 			  }
 			}// end dumping
+
 		} // end of shard loop, end of this epoch

+		if (delayUpdates) {
+			// apply all updates from this epoch to the weight vector
+			ScoreComponentCollection mosesWeights = decoder->getWeights();
+			mosesWeights.PlusEquals(delayedWeightUpdates);
+			decoder->setWeights(mosesWeights);
+			cerr << "Rank " << rank << ", epoch " << epoch << ", delayed update, new moses weights: " << mosesWeights << endl;
+		}
+
 		if (stabiliseLength && !fixLength) {
 			float lengthRatio = (float)(dev_hypothesis_length+1) / dev_reference_length;
 			if (lengthRatio >= 1) {
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@ -7,7 +7,9 @@ using namespace std;

 namespace Mira {

-size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
+size_t MiraOptimiser::updateWeights(
+		ScoreComponentCollection& currWeights,
+		ScoreComponentCollection& weightUpdate,
    const vector<vector<ScoreComponentCollection> >& featureValues,
    const vector<vector<float> >& losses,
    const vector<vector<float> >& bleuScores,
@ -142,9 +144,7 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	}

 	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
-
-	// apply update to weight vector
-	currWeights.PlusEquals(summedUpdate);
+	weightUpdate.PlusEquals(summedUpdate);

 	// Sanity check: are there still violated constraints after optimisation?
 /*	int violatedConstraintsAfter = 0;
@ -164,7 +164,9 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	return 0;
 }

-size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+size_t MiraOptimiser::updateWeightsHopeFear(
+		Moses::ScoreComponentCollection& currWeights,
+		Moses::ScoreComponentCollection& weightUpdate,
 		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
 		const std::vector<std::vector<float> >& bleuScoresHope,
@ -299,9 +301,7 @@ size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& cur
 	}

 	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
-
-	// apply update to weight vector
-	currWeights.PlusEquals(summedUpdate);
+	weightUpdate.PlusEquals(summedUpdate);

 	// Sanity check: are there still violated constraints after optimisation?
 /*	int violatedConstraintsAfter = 0;
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -30,7 +30,9 @@ namespace Mira {
    public:
      Optimiser() {}

-      virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+      virtual size_t updateWeightsHopeFear(
+      		Moses::ScoreComponentCollection& currWeights,
+      		Moses::ScoreComponentCollection& weightUpdate,
 				  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 				  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
 				  const std::vector<std::vector<float> >& bleuScoresHope,
@ -42,7 +44,9 @@ namespace Mira {
 
  class Perceptron : public Optimiser {
    public:
-			virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+			virtual size_t updateWeightsHopeFear(
+					Moses::ScoreComponentCollection& currWeights,
+					Moses::ScoreComponentCollection& weightUpdate,
 					const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 					const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
 					const std::vector<std::vector<float> >& bleuScoresHope,
@ -66,6 +70,7 @@ namespace Mira {
 		  m_margin_slack(margin_slack) { }
   
 	  size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
+	  								Moses::ScoreComponentCollection& weightUpdate,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
      						  const std::vector<std::vector<float> >& losses,
      						  const std::vector<std::vector<float> >& bleuScores,
@ -75,6 +80,7 @@ namespace Mira {
      						  size_t rank,
      						  size_t epoch);
     virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+    		 	 	 	 	 	 	Moses::ScoreComponentCollection& weightUpdate,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
      						  const std::vector<std::vector<float> >& bleuScoresHope,
--- a/mira/Perceptron.cpp
+++ b/mira/Perceptron.cpp
@ -24,7 +24,9 @@ using namespace std;

 namespace Mira {

-size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
+size_t Perceptron::updateWeightsHopeFear(
+		ScoreComponentCollection& currWeights,
+		ScoreComponentCollection& weightUpdate,
 		const vector< vector<ScoreComponentCollection> >& featureValuesHope,
 		const vector< vector<ScoreComponentCollection> >& featureValuesFear,
 		const vector< vector<float> >& dummy1,
@ -39,7 +41,7 @@ size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
 	featureValueDiff.MinusEquals(featureValuesFear[0][0]);
 	cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
 	featureValueDiff.MultiplyEquals(perceptron_learning_rate);
-	currWeights.PlusEquals(featureValueDiff);
+	weightUpdate.PlusEquals(featureValueDiff);
 	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl;
 	return 0;
 }