introduce parameter --update-after-epoch

git-svn-id: http://svn.statmt.org/repository/mira@3845 cc96ff50-19ce-11e0-b349-13d7f0bd23df
2024-11-10 00:47:31 +03:00 · 2011-03-23 12:13:38 +00:00 · 2011-03-23 12:13:38 +00:00 · d04066ee0e
commit d04066ee0e
parent 41d2d28408
5 changed files with 995 additions and 893 deletions
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@ -101,7 +101,7 @@ namespace Mira {
                              vector< float>& bleuScores,
                              bool oracle,
                              bool distinct,
-							  size_t rank)
+                              size_t rank)
  {
  	StaticData &staticData = StaticData::InstanceNonConst();

@ -138,7 +138,7 @@ namespace Mira {

    	//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
    	float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
-    	cerr << "Total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
+    	cerr << "Rank " << rank << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;

    	Phrase bestPhrase = path.GetTargetPhrase();

--- a/mira/Main.cpp
+++ b/mira/Main.cpp
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@ -7,15 +7,13 @@ using namespace std;
 namespace Mira {

 int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
-		const vector< vector<ScoreComponentCollection> >& featureValues,
-		const vector< vector<float> >& losses,
-		const vector<std::vector<float> >& bleuScores,
-		const vector< ScoreComponentCollection>& oracleFeatureValues,
-		const vector< float> oracleBleuScores,
-		const vector< size_t> sentenceIds,
-		float learning_rate,
-		float max_sentence_update,
-		size_t rank) {
+    const vector<vector<ScoreComponentCollection> >& featureValues,
+    const vector<vector<float> >& losses,
+    const vector<std::vector<float> >& bleuScores, const vector<
+        ScoreComponentCollection>& oracleFeatureValues,
+    const vector<float> oracleBleuScores, const vector<size_t> sentenceIds,
+    float learning_rate, float max_sentence_update, size_t rank,
+    bool update_after_epoch) {

 	// add every oracle in batch to list of oracles (under certain conditions)
 	for (size_t i = 0; i < oracleFeatureValues.size(); ++i) {
@ -35,8 +33,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 				m_bleu_of_oracles[sentenceId][j] = oracleBleuScores[j];
 				updated = true;
 				break;
-			}
-			else if (worstWeightedScore == 0 || currentWeightedScore > worstWeightedScore){
+			} else if (worstWeightedScore == 0 || currentWeightedScore
+			    > worstWeightedScore) {
 				worstWeightedScore = currentWeightedScore;
 				indexOfWorst = j;
 			}
@ -47,8 +45,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 			if (m_max_number_oracles > m_oracles[sentenceId].size()) {
 				m_oracles[sentenceId].push_back(oracleFeatureValues[i]);
 				m_bleu_of_oracles[sentenceId].push_back(oracleBleuScores[i]);
-			}
-			else {
+			} else {
 				m_oracles[sentenceId][indexOfWorst] = oracleFeatureValues[i];
 				m_bleu_of_oracles[sentenceId][indexOfWorst] = oracleBleuScores[i];
 			}
@ -56,8 +53,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	}

 	size_t violatedConstraintsBefore = 0;
-	vector< ScoreComponentCollection> featureValueDiffs;
-	vector< float> lossMarginDistances;
+	vector<ScoreComponentCollection> featureValueDiffs;
+	vector<float> lossMarginDistances;

 	// find most violated constraint
 	float maxViolationLossMarginDistance;
@ -67,8 +64,9 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	for (size_t i = 0; i < featureValues.size(); ++i) {
 		size_t sentenceId = sentenceIds[i];
 		if (m_oracles[sentenceId].size() > 1)
-			cerr << "Available oracles for source sentence " << sentenceId << ": " << m_oracles[sentenceId].size() << endl;
-			for (size_t j = 0; j < featureValues[i].size(); ++j) {
+			cerr << "Available oracles for source sentence " << sentenceId << ": "
+			    << m_oracles[sentenceId].size() << endl;
+		for (size_t j = 0; j < featureValues[i].size(); ++j) {
 			// check if optimisation criterion is violated for one hypothesis and the oracle
 			// h(e*) >= h(e_ij) + loss(e_ij)
 			// h(e*) - h(e_ij) >= loss(e_ij)
@ -84,15 +82,15 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 					loss *= log10(bleuScores[i][j]);
 				}

-				cerr << "Rank " << rank << ", loss: " << loss << ", model score diff: " << modelScoreDiff << endl;
+				//cerr << "Rank " << rank << ", loss: " << loss << ", model score diff: "
+				//    << modelScoreDiff << endl;

 				bool addConstraint = true;
 				if (modelScoreDiff < loss) {
 					// constraint violated
 					++violatedConstraintsBefore;
 					oldDistanceFromOptimum += (loss - modelScoreDiff);
-				}
-				else if (m_onlyViolatedConstraints) {
+				} else if (m_onlyViolatedConstraints) {
 					// constraint not violated
 					addConstraint = false;
 				}
@ -100,13 +98,13 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 				if (addConstraint) {
 					float lossMarginDistance = loss - modelScoreDiff;

-					if (m_accumulateMostViolatedConstraints && !m_pastAndCurrentConstraints) {
+					if (m_accumulateMostViolatedConstraints
+					    && !m_pastAndCurrentConstraints) {
 						if (lossMarginDistance > maxViolationLossMarginDistance) {
 							maxViolationLossMarginDistance = lossMarginDistance;
 							maxViolationfeatureValueDiff = featureValueDiff;
 						}
-					}
-					else if (m_pastAndCurrentConstraints) {
+					} else if (m_pastAndCurrentConstraints) {
 						if (lossMarginDistance > maxViolationLossMarginDistance) {
 							maxViolationLossMarginDistance = lossMarginDistance;
 							maxViolationfeatureValueDiff = featureValueDiff;
@ -114,8 +112,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,

 						featureValueDiffs.push_back(featureValueDiff);
 						lossMarginDistances.push_back(lossMarginDistance);
-					}
-					else {
+					} else {
 						// Objective: 1/2 * ||w' - w||^2 + C * SUM_1_m[ max_1_n (l_ij - Delta_h_ij.w')]
 						// To add a constraint for the optimiser for each sentence i and hypothesis j, we need:
 						// 1. vector Delta_h_ij of the feature value differences (oracle - hypothesis)
@ -135,22 +132,24 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 		}
 	}

-	cerr << "Number of violated constraints before optimisation: " << violatedConstraintsBefore << endl;
+	cerr << "Number of violated constraints before optimisation: "
+	    << violatedConstraintsBefore << endl;
 	if (featureValueDiffs.size() != 30) {
-		cerr << "Number of constraints passed to optimiser: " << featureValueDiffs.size() << endl;
+		cerr << "Number of constraints passed to optimiser: "
+		    << featureValueDiffs.size() << endl;
 	}

 	// run optimisation: compute alphas for all given constraints
-	vector< float> alphas;
+	vector<float> alphas;
 	ScoreComponentCollection totalUpdate;
 	if (m_accumulateMostViolatedConstraints && !m_pastAndCurrentConstraints) {
 		m_featureValueDiffs.push_back(maxViolationfeatureValueDiff);
 		m_lossMarginDistances.push_back(maxViolationLossMarginDistance);

 		if (m_slack != 0) {
-			alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances, m_slack);
-		}
-		else {
+			alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances,
+			    m_slack);
+		} else {
 			alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances);
 		}

@ -164,8 +163,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 			// accumulate update
 			totalUpdate.PlusEquals(m_featureValueDiffs[k]);
 		}
-	}
-	else if (violatedConstraintsBefore > 0) {
+	} else if (violatedConstraintsBefore > 0) {
 		if (m_pastAndCurrentConstraints) {
 			// add all (most violated) past constraints to the list of current constraints
 			for (size_t i = 0; i < m_featureValueDiffs.size(); ++i) {
@ -179,9 +177,9 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 		}

 		if (m_slack != 0) {
-			alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances, m_slack);
-		}
-		else {
+			alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances,
+			    m_slack);
+		} else {
 			alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances);
 		}

@ -195,59 +193,70 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 			// accumulate update
 			totalUpdate.PlusEquals(featureValueDiffs[k]);
 		}
-	}
-	else {
+	} else {
 		cerr << "No constraint violated for this batch" << endl;
 		return 0;
 	}

 	// apply learning rate (fixed or flexible)
 	if (learning_rate != 1) {
-		cerr << "Rank " << rank << ", update before applying learning rate: " << totalUpdate << endl;
+		cerr << "Rank " << rank << ", update before applying learning rate: "
+		    << totalUpdate << endl;
 		totalUpdate.MultiplyEquals(learning_rate);
-		cerr << "Rank " << rank << ", update after applying learning rate: " << totalUpdate << endl;
+		cerr << "Rank " << rank << ", update after applying learning rate: "
+		    << totalUpdate << endl;
 	}

 	// apply threshold scaling
 	if (max_sentence_update != -1) {
-		cerr << "Rank " << rank << ", update before scaling to max-sentence-update: " << totalUpdate << endl;
+		cerr << "Rank " << rank
+		    << ", update before scaling to max-sentence-update: " << totalUpdate
+		    << endl;
 		totalUpdate.ThresholdScaling(max_sentence_update);
-		cerr << "Rank " << rank << ", update after scaling to max-sentence-update: " << totalUpdate << endl;
+		cerr << "Rank " << rank
+		    << ", update after scaling to max-sentence-update: " << totalUpdate
+		    << endl;
 	}

-	// apply update to weight vector
-	cerr << "Rank " << rank << ", weights before update: " << currWeights << endl;
-	currWeights.PlusEquals(totalUpdate);
-	cerr << "Rank " << rank << ", weights after update: " << currWeights << endl;
+	if (update_after_epoch) {
+		m_accumulatedUpdates.PlusEquals(totalUpdate);
+		cerr << "Rank " << rank << ", new accumulated updates:" << m_accumulatedUpdates << endl;
+	} else {
+		// apply update to weight vector
+		cerr << "Rank " << rank << ", weights before update: " << currWeights
+		    << endl;
+		currWeights.PlusEquals(totalUpdate);
+		cerr << "Rank " << rank << ", weights after update: " << currWeights
+		    << endl;

-	// sanity check: how many constraints violated after optimisation?
-	size_t violatedConstraintsAfter = 0;
-	float newDistanceFromOptimum = 0;
-	for (size_t i = 0; i < featureValues.size(); ++i) {
-		for (size_t j = 0; j < featureValues[i].size(); ++j) {
-			ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
-			featureValueDiff.MinusEquals(featureValues[i][j]);
-			float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
-			float loss = losses[i][j] * m_marginScaleFactor;
-			if (modelScoreDiff < loss) {
-				++violatedConstraintsAfter;
-				newDistanceFromOptimum += (loss - modelScoreDiff);
+		// sanity check: how many constraints violated after optimisation?
+		size_t violatedConstraintsAfter = 0;
+		float newDistanceFromOptimum = 0;
+		for (size_t i = 0; i < featureValues.size(); ++i) {
+			for (size_t j = 0; j < featureValues[i].size(); ++j) {
+				ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
+				featureValueDiff.MinusEquals(featureValues[i][j]);
+				float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
+				float loss = losses[i][j] * m_marginScaleFactor;
+				if (modelScoreDiff < loss) {
+					++violatedConstraintsAfter;
+					newDistanceFromOptimum += (loss - modelScoreDiff);
+				}
 			}
 		}
-	}

-	int constraintChange = violatedConstraintsBefore - violatedConstraintsAfter;
-	cerr << "Rank " << rank << ", constraint change: " << constraintChange << " (before: " << violatedConstraintsBefore << ")" << endl;
-	float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
-	cerr << "Rank " << rank << ", distance change: " << distanceChange << endl;
-	if (constraintChange < 0 && distanceChange < 0) {
-		return -1;
+		int constraintChange = violatedConstraintsBefore - violatedConstraintsAfter;
+		cerr << "Rank " << rank << ", constraint change: " << constraintChange
+		    << " (before: " << violatedConstraintsBefore << ")" << endl;
+		float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
+		cerr << "Rank " << rank << ", distance change: " << distanceChange << endl;
+		if (constraintChange < 0 && distanceChange < 0) {
+			return -1;
+		}
 	}

 	return 0;
 }

-
 }

-
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -30,37 +30,21 @@ namespace Mira {
    public:
      Optimiser() {}
      virtual int updateWeights(Moses::ScoreComponentCollection& weights,
-                         const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
-                         const std::vector<std::vector<float> >& losses,
-                         const std::vector<std::vector<float> >& bleuScores,
-                         const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
-                         const std::vector< float> oracleBleuScores,
-                         const std::vector< size_t> dummy,
-                         float learning_rate,
-                         float max_sentence_update,
-                         size_t rank) = 0;
-  };
- 
-  class DummyOptimiser : public Optimiser {
-    public:
-      virtual int updateWeights(Moses::ScoreComponentCollection& weights,
-                         const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
-                         const std::vector< std::vector<float> >& losses,
-                         const std::vector<std::vector<float> >& bleuScores,
-                         const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
-                         const std::vector< float> oracleBleuScores,
-                         const std::vector< size_t> dummy,
-                         float learning_rate,
-                         float max_sentence_update,
-                         size_t rank)
-                         { return 0; }
+            						  const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
+            						  const std::vector< std::vector<float> >& losses,
+            						  const std::vector<std::vector<float> >& bleuScores,
+            						  const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
+            						  const std::vector< float> oracleBleuScores,
+            						  const std::vector< size_t> sentenceId,
+      										float learning_rate,
+      										float max_sentence_update,
+      										size_t rank,
+      										bool update_after_epoch) = 0;
  };
 
  class Perceptron : public Optimiser {
    public:
-       
-
-      virtual int updateWeights(Moses::ScoreComponentCollection& weights,
+			virtual int updateWeights(Moses::ScoreComponentCollection& weights,
                         const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
                         const std::vector< std::vector<float> >& losses,
                         const std::vector<std::vector<float> >& bleuScores,
@ -69,7 +53,8 @@ namespace Mira {
                         const std::vector< size_t> dummy,
                         float learning_rate,
                         float max_sentence_update,
-                         size_t rank);
+                         size_t rank,
+                         bool update_after_epoch);
  };

  class MiraOptimiser : public Optimiser {
@ -93,7 +78,7 @@ namespace Mira {

     ~MiraOptimiser() {}
   
-      virtual int updateWeights(Moses::ScoreComponentCollection& weights,
+     virtual int updateWeights(Moses::ScoreComponentCollection& weights,
      						  const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
      						  const std::vector< std::vector<float> >& losses,
      						  const std::vector<std::vector<float> >& bleuScores,
@ -102,7 +87,8 @@ namespace Mira {
      						  const std::vector< size_t> sentenceId,
 										float learning_rate,
 										float max_sentence_update,
-										size_t rank);
+										size_t rank,
+										bool update_after_epoch);

      void setOracleIndices(std::vector<size_t> oracleIndices) {
    	  m_oracleIndices= oracleIndices;
@ -115,6 +101,14 @@ namespace Mira {
      void setMarginScaleFactor(float msf) {
      	m_marginScaleFactor = msf;
      }
+
+      Moses::ScoreComponentCollection getAccumulatedUpdates() {
+					return m_accumulatedUpdates;
+      }
+
+      void resetAccumulatedUpdates() {
+      	m_accumulatedUpdates.ZeroAll();
+      }
  
   private:
      // number of hypotheses used for each nbest list (number of hope, fear, best model translations)
@ -151,6 +145,8 @@ namespace Mira {
      bool m_accumulateMostViolatedConstraints;

      bool m_pastAndCurrentConstraints;
+
+      Moses::ScoreComponentCollection m_accumulatedUpdates;
  };
 }

--- a/mira/Perceptron.cpp
+++ b/mira/Perceptron.cpp
@ -33,7 +33,8 @@ int Perceptron::updateWeights(ScoreComponentCollection& currWeights,
 		const vector< size_t> dummy,
 		float learning_rate,
 		float max_sentence_update,
-		size_t rank)
+		size_t rank,
+		bool update_after_epoch)
 {
 	for (size_t i = 0; i < featureValues.size(); ++i) {
 		for (size_t j = 0; j < featureValues[i].size(); ++j) {