change bleu smoothing, change handling of multiple oracles, parameter for increasing BP

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/mira-mtm5@3770 1f5c12ca-751b-0410-a591-d2e778427230
2024-12-28 14:32:38 +03:00 · 2010-12-10 16:34:43 +00:00 · 2010-12-10 16:34:43 +00:00 · 2f1b959302
commit 2f1b959302
parent cd62ffc021
8 changed files with 95 additions and 55 deletions
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@ -66,14 +66,26 @@ namespace Mira {
    delete[] mosesargv;
  }
 
-  MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing)
+  MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing)
 		: m_manager(NULL) {
 	  // force initialisation of the phrase dictionary
      const StaticData &staticData = StaticData::Instance();
+
+      // is this needed?
+      //m_sentence = new Sentence(Input);
+      //stringstream in("Initialising decoder..\n");
+      //const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
+      //m_sentence->Read(in,inputFactorOrder);
+
      const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);

+      // is this needed?
+      //(TranslationSystem::DEFAULT);
+      //m_manager = new Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system);
+      //m_manager->ProcessSentence();
+
      // Add the bleu feature
-      m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, increaseBP, historySmoothing);
+      m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
      (const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
      m_bleuScoreFeature->LoadReferences(refs);
  }
--- a/mira/Decoder.h
+++ b/mira/Decoder.h
@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel,  int argc=0, char** a
 **/
 class MosesDecoder {
  public:
-    MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing);
+    MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);
 	
    //returns the best sentence
    std::vector<const Moses::Word*> getNBest(const std::string& source,
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@ -92,9 +92,9 @@ int main(int argc, char** argv) {
  float historySmoothing;
  bool useScaledReference;
  bool scaleByInputLength;
-  bool increaseBP;
-  bool regulariseHildrethUpdates;
-  bool accumulateOracles;
+  float BPfactor;
+  float slack;
+  size_t maxNumberOracles;
  bool accumulateMostViolatedConstraints;
  bool pastAndCurrentConstraints;
  bool suppressConvergence;
@ -124,9 +124,9 @@ int main(int argc, char** argv) {
 	    ("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
 	    ("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
 	    ("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
-	    ("increase-BP", po::value<bool>(&increaseBP)->default_value(false), "Increase penalty for short translations")
-	    ("regularise-hildreth-updates", po::value<bool>(&regulariseHildrethUpdates)->default_value(false), "Regularise Hildreth updates with the value set for clipping")
-	    ("accumulate-oracles", po::value<bool>(&accumulateOracles)->default_value(false), "Accumulate oracle translations over epochs")
+	    ("BP-factor", po::value<float>(&BPfactor)->default_value(1.0), "Increase penalty for short translations")
+	    ("slack", po::value<float>(&slack)->default_value(0), "Use slack in optimization problem")
+	    ("max-number-oracles", po::value<size_t>(&maxNumberOracles)->default_value(1), "Set a maximum number of oracles to use per example")
 	    ("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false), "Accumulate most violated constraint per example")
 	    ("past-and-current-constraints", po::value<bool>(&pastAndCurrentConstraints)->default_value(false), "Accumulate most violated constraint per example and use them along all current constraints")
 	    ("suppress-convergence", po::value<bool>(&suppressConvergence)->default_value(false), "Suppress convergence, fixed number of epochs")
@ -184,7 +184,7 @@ int main(int argc, char** argv) {

  // initialise Moses
  initMoses(mosesConfigFile, verbosity);//, argc, argv);
-  MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference, scaleByInputLength, increaseBP, historySmoothing);
+  MosesDecoder* decoder = new MosesDecoder(referenceSentences, useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
  ScoreComponentCollection startWeights = decoder->getWeights();
  startWeights.L1Normalise();
  decoder->setWeights(startWeights);
@ -222,16 +222,15 @@ int main(int argc, char** argv) {
  cerr << "Nbest list size: " << n << endl;
  cerr << "Distinct translations in nbest list? " << distinctNbest << endl;
  cerr << "Batch size: " << batchSize << endl;
-  cerr << "Accumulate oracles? " << accumulateOracles << endl;
+  cerr << "Maximum number of oracles: " << maxNumberOracles << endl;
  cerr << "Accumulate most violated constraints? " << accumulateMostViolatedConstraints << endl;
  cerr << "Margin scale factor: " << marginScaleFactor << endl;
  cerr << "Add only violated constraints? " << onlyViolatedConstraints << endl;
-  float slack = regulariseHildrethUpdates ? clipping : 0;
  cerr << "Using slack? " << slack << endl;
-  cerr << "Increase BP? " << increaseBP << endl;
+  cerr << "BP factor: " << BPfactor << endl;
  if (learner == "mira") {
    cerr << "Optimising using Mira" << endl;
-    optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor, onlyViolatedConstraints, clipping, fixedClipping, regulariseHildrethUpdates, weightedLossFunction, accumulateOracles, accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size());
+    optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor, onlyViolatedConstraints, clipping, fixedClipping, slack, weightedLossFunction, maxNumberOracles, accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size());
    if (hildreth) {
    	cerr << "Using Hildreth's optimisation algorithm.." << endl;
    }
@ -401,7 +400,7 @@ int main(int argc, char** argv) {
 		  // run optimiser on batch
 	      cerr << "\nRun optimiser.." << endl;
 	      ScoreComponentCollection oldWeights(mosesWeights);
-	      int constraintChange = optimiser->updateWeights(mosesWeights, featureValues, losses, bleuScores, oracleFeatureValues, ref_ids);
+	      int constraintChange = optimiser->updateWeights(mosesWeights, featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids);

 		  // update Moses weights
 	      mosesWeights.L1Normalise();
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@ -11,12 +11,45 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 		const vector< vector<float> >& losses,
 		const vector<std::vector<float> >& bleuScores,
 		const vector< ScoreComponentCollection>& oracleFeatureValues,
+		const vector< float> oracleBleuScores,
 		const vector< size_t> sentenceIds) {

-	// add every oracle in batch to list of oracles
+	// add every oracle in batch to list of oracles (under certain conditions)
 	for (size_t i = 0; i < oracleFeatureValues.size(); ++i) {
+		float newWeightedScore = oracleFeatureValues[i].GetWeightedScore();
 		size_t sentenceId = sentenceIds[i];
-		m_oracles[sentenceId].push_back(oracleFeatureValues[i]);
+
+		// compare new oracle with existing oracles:
+		// if same translation exists, just update the bleu score
+		// if not, add the oracle
+		bool updated = false;
+		size_t indexOfWorst = 0;
+		float worstWeightedScore = 0;
+		for (size_t j = 0; j < m_oracles[sentenceId].size(); ++j) {
+			float currentWeightedScore = m_oracles[sentenceId][j].GetWeightedScore();
+			if (currentWeightedScore == newWeightedScore) {
+				cerr << "updated.." << endl;
+				m_bleu_of_oracles[sentenceId][j] = oracleBleuScores[j];
+				updated = true;
+				break;
+			}
+			else if (worstWeightedScore == 0 || currentWeightedScore > worstWeightedScore){
+				worstWeightedScore = currentWeightedScore;
+				indexOfWorst = j;
+			}
+		}
+
+		if (!updated) {
+			// add if number of maximum oracles not exceeded, otherwise override the worst
+			if (m_max_number_oracles > m_oracles[sentenceId].size()) {
+				m_oracles[sentenceId].push_back(oracleFeatureValues[i]);
+				m_bleu_of_oracles[sentenceId].push_back(oracleBleuScores[i]);
+			}
+			else {
+				m_oracles[sentenceId][indexOfWorst] = oracleFeatureValues[i];
+				m_bleu_of_oracles[sentenceId][indexOfWorst] = oracleBleuScores[i];
+			}
+		}
 	}

 	if (m_hildreth) {
@ -38,6 +71,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,

 				// iterate over all available oracles (1 if not accumulating, otherwise one per started epoch)
 				for (size_t k = 0; k < m_oracles[sentenceId].size(); ++k) {
+					cerr << "Oracle " << k << ": " << m_oracles[sentenceId][k] << " (BLEU: " << m_bleu_of_oracles[sentenceId][k] << ", model score: " <<  m_oracles[sentenceId][k].GetWeightedScore() << ")" << endl;
 					ScoreComponentCollection featureValueDiff = m_oracles[sentenceId][k];
 					featureValueDiff.MinusEquals(featureValues[i][j]);
 					float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
@ -87,7 +121,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 			}
 		}

-		if (!m_accumulateOracles) {
+		if (m_max_number_oracles == 1) {
 			for (size_t k = 0; k < sentenceIds.size(); ++k) {
 				size_t sentenceId = sentenceIds[k];
 				m_oracles[sentenceId].clear();
@ -101,8 +135,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 			m_lossMarginDistances.push_back(maxViolationLossMarginDistance);

 			cerr << "Number of constraints passed to optimiser: " << m_featureValueDiffs.size() << endl;
-			if (m_regulariseHildrethUpdates) {
-				alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances, m_c);
+			if (m_slack != 0) {
+				alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances, m_slack);
 			}
 			else {
 				alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances);
@ -134,8 +168,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,

 			//cerr << "Number of violated constraints before optimisation: " << violatedConstraintsBefore << endl;
 			cerr << "Number of constraints passed to optimiser: " << featureValueDiffs.size() << endl;
-			if (m_regulariseHildrethUpdates) {
-				alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances, m_c);
+			if (m_slack != 0) {
+				alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances, m_slack);
 			}
 			else {
 				alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances);
@ -251,7 +285,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 		}
 	}

-	if (!m_accumulateOracles) {
+	if (m_max_number_oracles == 1) {
 		for (size_t k = 0; k < sentenceIds.size(); ++k) {
 			size_t sentenceId = sentenceIds[k];
 			m_oracles[sentenceId].clear();
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -34,6 +34,7 @@ namespace Mira {
                         const std::vector<std::vector<float> >& losses,
                         const std::vector<std::vector<float> >& bleuScores,
                         const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
+                         const std::vector< float> oracleBleuScores,
                         const std::vector< size_t> dummy) = 0;
  };
 
@ -44,6 +45,7 @@ namespace Mira {
                         const std::vector< std::vector<float> >& losses,
                         const std::vector<std::vector<float> >& bleuScores,
                         const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
+                         const std::vector< float> oracleBleuScores,
                         const std::vector< size_t> dummy)
                         { return 0; }
  };
@ -57,6 +59,7 @@ namespace Mira {
                         const std::vector< std::vector<float> >& losses,
                         const std::vector<std::vector<float> >& bleuScores,
                         const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
+                         const std::vector< float> oracleBleuScores,
                         const std::vector< size_t> dummy);
  };

@ -65,7 +68,7 @@ namespace Mira {
 	  MiraOptimiser() :
 		  Optimiser() { }

-	  MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float clipping, bool fixedClipping, bool regulariseHildrethUpdates, bool weightedLossFunction, bool accumulateOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, size_t exampleSize) :
+	  MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float clipping, bool fixedClipping, float slack, bool weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, size_t exampleSize) :
 		  Optimiser(),
 		  m_n(n),
 		  m_hildreth(hildreth),
@ -73,12 +76,13 @@ namespace Mira {
 		  m_onlyViolatedConstraints(onlyViolatedConstraints),
 		  m_c(clipping),
 		  m_fixedClipping(fixedClipping),
-		  m_regulariseHildrethUpdates(regulariseHildrethUpdates),
+		  m_slack(slack),
 		  m_weightedLossFunction(weightedLossFunction),
-		  m_accumulateOracles(accumulateOracles),
+		  m_max_number_oracles(maxNumberOracles),
 		  m_accumulateMostViolatedConstraints(accumulateMostViolatedConstraints),
 		  m_pastAndCurrentConstraints(pastAndCurrentConstraints),
-		  m_oracles(exampleSize) { }
+		  m_oracles(exampleSize),
+		  m_bleu_of_oracles(exampleSize) { }

     ~MiraOptimiser() {}
   
@ -87,6 +91,7 @@ namespace Mira {
      						  const std::vector< std::vector<float> >& losses,
      						  const std::vector<std::vector<float> >& bleuScores,
      						  const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
+      						  const std::vector< float> oracleBleuScores,
      						  const std::vector< size_t> sentenceId);
      float computeDelta(Moses::ScoreComponentCollection& currWeights,
      				const Moses::ScoreComponentCollection featureValuesDiff,
@ -120,7 +125,7 @@ namespace Mira {
      bool m_fixedClipping;

      // regularise Hildreth updates
-      bool m_regulariseHildrethUpdates;
+      float m_slack;

      bool m_weightedLossFunction;

@ -130,7 +135,9 @@ namespace Mira {
      // keep a list of oracle translations over epochs
      std::vector < std::vector< Moses::ScoreComponentCollection> > m_oracles;

-      bool m_accumulateOracles;
+      std::vector < std::vector< float> > m_bleu_of_oracles;
+
+      size_t m_max_number_oracles;

      // accumulate most violated constraints for every example
      std::vector< Moses::ScoreComponentCollection> m_featureValueDiffs;
--- a/mira/Perceptron.cpp
+++ b/mira/Perceptron.cpp
@ -27,8 +27,9 @@ namespace Mira {
 int Perceptron::updateWeights(ScoreComponentCollection& currWeights,
 		const vector< vector<ScoreComponentCollection> >& featureValues,
 		const vector< vector<float> >& losses,
-		const vector<std::vector<float> >& bleuScores,
+		const vector< vector<float> >& bleuScores,
 		const vector< ScoreComponentCollection>& oracleFeatureValues,
+		const vector< float> oracleBleuScores,
 		const vector< size_t> dummy)
 {
 	for (size_t i = 0; i < featureValues.size(); ++i) {
--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@ -80,10 +80,10 @@ BleuScoreFeature::BleuScoreFeature():
                                 m_ref_length_history(0),
                                 m_use_scaled_reference(true),
                                 m_scale_by_input_length(true),
-                                 m_increase_BP(false),
+                                 m_BP_factor(1.0),
                                 m_historySmoothing(0.9) {}

-BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing):
+BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing):
                                 StatefulFeatureFunction("BleuScore"),      
                                 m_count_history(BleuScoreState::bleu_order),
                                 m_match_history(BleuScoreState::bleu_order),
@ -92,7 +92,7 @@ BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLen
                                 m_ref_length_history(0),
                                 m_use_scaled_reference(useScaledReference),
                                 m_scale_by_input_length(scaleByInputLength),
-                                 m_increase_BP(increaseBP),
+                                 m_BP_factor(BPfactor),
                                 m_historySmoothing(historySmoothing) {}

 void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
@ -317,8 +317,10 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
        if (state->m_ngram_counts[i]) {
            smoothed_matches = m_match_history[i] + state->m_ngram_matches[i];
            smoothed_count = m_count_history[i] + state->m_ngram_counts[i];
-            if (smoothed_matches == 0) {
-            	smoothed_matches = 0.0001;
+            if (i > 0) {
+            	// smoothing for all n > 1
+            	smoothed_matches += 1;
+            	smoothed_count += 1;
            }

            precision *= smoothed_matches / smoothed_count;
@ -337,12 +339,7 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
 	    if (state->m_target_length < state->m_scaled_ref_length) {
 	    	float smoothed_target_length = m_target_length_history + state->m_target_length;
 	    	float smoothed_ref_length = m_ref_length_history + state->m_scaled_ref_length;
-	    	if (m_increase_BP) {
-	    		precision *= exp(1 - ((1.1 * smoothed_ref_length)/ smoothed_target_length));
-	    	}
-	    	else{
-	    		precision *= exp(1 - (smoothed_ref_length / smoothed_target_length));
-	    	}
+	    	precision *= exp(1 - ((m_BP_factor * smoothed_ref_length)/ smoothed_target_length));
 	    }
 	}
 	else {
@ -351,12 +348,7 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
 			if (state->m_target_length < state->m_scaled_ref_length) {
 				float smoothed_target_length = m_target_length_history + state->m_target_length;
 				float smoothed_ref_length = m_ref_length_history + state->m_scaled_ref_length;
-		    	if (m_increase_BP) {
-		    		precision *= exp(1 - ((1.1 * smoothed_ref_length)/ smoothed_target_length));
-		    	}
-		    	else{
-		    		precision *= exp(1 - (smoothed_ref_length / smoothed_target_length));
-		    	}
+		    	precision *= exp(1 - ((m_BP_factor * smoothed_ref_length)/ smoothed_target_length));
 			}
 		}
 		else {
@ -364,12 +356,7 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
 			if (state->m_target_length < state->m_source_phrase_length) {
 				float smoothed_target_length = m_target_length_history + state->m_target_length;
 				float smoothed_ref_length = m_ref_length_history + state->m_scaled_ref_length;
-		    	if (m_increase_BP) {
-		    		precision *= exp(1 - ((1.1 * smoothed_ref_length)/ smoothed_target_length));
-		    	}
-		    	else{
-		    		precision *= exp(1 - (smoothed_ref_length / smoothed_target_length));
-		    	}
+				precision *= exp(1 - ((m_BP_factor * smoothed_ref_length)/ smoothed_target_length));
 			}
 		}
 	}
--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@ -45,7 +45,7 @@ typedef std::map< Phrase, size_t > NGrams;
 class BleuScoreFeature : public StatefulFeatureFunction {
 public:
 	BleuScoreFeature();
-    BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, bool increaseBP, float historySmoothing);
+    BleuScoreFeature(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);

    std::string GetScoreProducerDescription() const
    {
@ -92,7 +92,7 @@ private:
    bool m_scale_by_input_length;

    // increase penalty for short translations
-    bool m_increase_BP;
+    float m_BP_factor;

    float m_historySmoothing;