Update mira optimatization code and merge Main.cpp

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/mira-mtm5@3652 1f5c12ca-751b-0410-a591-d2e778427230
2024-09-20 07:42:21 +03:00 · 2010-10-25 12:22:35 +00:00 · 2010-10-25 12:22:35 +00:00 · 85a71793a6
commit 85a71793a6
parent 1bd263c4ec
5 changed files with 377 additions and 187 deletions
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@ -101,7 +101,7 @@ namespace Mira {
                              vector< ScoreComponentCollection>& featureValues,
                              vector< float>& bleuScores  )
  {
-    StaticData &staticData = StaticData::InstanceNonConst();
+	StaticData &staticData = StaticData::InstanceNonConst();

 	m_sentence = new Sentence(Input);
    stringstream in(source + "\n");
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@ -77,9 +77,11 @@ int main(int argc, char** argv) {
  vector<string> referenceFiles;
  size_t epochs;
  string learner;
-  bool shuffle = true;
+  bool shuffle = true;	 // TODO: parameterize?
  size_t mixFrequency;
  size_t weightDumpFrequency;
+  size_t clippingScheme;
+  float lowerBound, upperBound;
  po::options_description desc("Allowed options");
  desc.add_options()
        ("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
@ -90,7 +92,11 @@ int main(int argc, char** argv) {
        ("epochs,e", po::value<size_t>(&epochs)->default_value(1), "Number of epochs")
        ("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
        ("mix-frequency", po::value<size_t>(&mixFrequency)->default_value(1), "How often per epoch to mix weights, when using mpi")
-        ("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights");
+        ("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights")
+        ("clipping-scheme,c", po::value<size_t>(&clippingScheme)->default_value(1), "Select clipping scheme for weight updates (1: equal 2: varied")
+        ("lower-bound,lb", po::value<float>(&lowerBound)->default_value(-0.01), "Lower bound for mira clipping scheme")
+        ("upper-bound,ub", po::value<float>(&upperBound)->default_value(0.01), "Upper bound for mira clipping scheme");
+

  po::options_description cmdline_options;
  cmdline_options.add(desc);
@ -122,22 +128,6 @@ int main(int argc, char** argv) {
    return 1;
  }

-  //FIXME: Make these configurable
-  float miraLowerBound = 0;
-  float miraUpperBound = 1;
-
-  Optimiser* optimiser = NULL;
-  if (learner == "mira") {
-    cerr << "Optimising using Mira" << endl;
-    optimiser = new MiraOptimiser(miraLowerBound, miraUpperBound);
-  } else if (learner == "perceptron") {
-    cerr << "Optimising using Perceptron" << endl;
-    optimiser = new Perceptron();
-  } else {
-    cerr << "Error: Unknown optimiser: " << learner << endl;
-  }
-
-
  //load input and references 
  vector<string> inputSentences;
  if (!loadSentences(inputFile, inputSentences)) {
@ -158,9 +148,25 @@ int main(int argc, char** argv) {
      return 1;
    }
  }
+
  //initialise moses
  initMoses(mosesConfigFile, verbosity);//, argc, argv);
  MosesDecoder* decoder = new MosesDecoder(referenceSentences) ;
+  ScoreComponentCollection startWeights = decoder->getWeights();
+
+  // print feature function and weights
+  // TODO: scaling of feature functions
+  // TODO: initialise weights equally
+  const vector<const ScoreProducer*> featureFunctions = StaticData::Instance().GetTranslationSystem (TranslationSystem::DEFAULT).GetFeatureFunctions();
+  for (size_t i = 0; i < featureFunctions.size(); ++i) {
+	  cout << "Feature functions: " << featureFunctions[i]->GetScoreProducerDescription() << ": " << featureFunctions[i]->GetNumScoreComponents() << endl;
+	  vector< float> weights = startWeights.GetScoresForProducer(featureFunctions[i]);
+	  cout << "weights: ";
+	  for (size_t j = 0; j < weights.size(); ++j) {
+		  cout << weights[j];
+	  }
+	  cout << endl;
+  }

  //Optionally shuffle the sentences
  vector<size_t> order;
@ -189,120 +195,174 @@ int main(int argc, char** argv) {
  shard.resize(shardSize);
  copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());

-  
+  Optimiser* optimiser = NULL;
+  size_t n = 10;								// size of n-best lists
+  if (learner == "mira") {
+    cerr << "Optimising using Mira" << endl;
+    optimiser = new MiraOptimiser(n, clippingScheme, lowerBound, upperBound);
+  } else if (learner == "perceptron") {
+    cerr << "Optimising using Perceptron" << endl;
+    optimiser = new Perceptron();
+  } else {
+    cerr << "Error: Unknown optimiser: " << learner << endl;
+  }

  //Main loop:
-  ScoreComponentCollection cumulativeWeights;
-  size_t modelHypoCount = 10;
-  size_t hopeHypoCount = 10;
-  size_t fearHypoCount = 10;
+  ScoreComponentCollection cumulativeWeights;		// collect weights per epoch to produce an average
  size_t iterations = 0;
+  size_t epoch = 0;
+
+  time_t now = time(0); // get current time
+  struct tm* tm = localtime(&now); // get struct filled out
+  cout << "Start date/time: " << tm->tm_mon+1 << "/" << tm->tm_mday << "/" << tm->tm_year + 1900
+		    << ", " << tm->tm_hour << ":" << tm->tm_min << ":" << tm->tm_sec << endl;
  
-	
-  for (size_t epoch = 0; epoch < epochs; ++epoch) {
-    //TODO: batching
-    size_t shardPosition = 0;
-    for (vector<size_t>::const_iterator sid = shard.begin();
-       sid != shard.end(); ++sid) {
-      const string& input = inputSentences[*sid];
-      const vector<string>& refs = referenceSentences[*sid];
+  // TODO: stop MIRA when score on dev or tuning set does not improve further?
+  for (size_t epoch = 1; epoch <= epochs; ++epoch) {

-      vector<vector<ScoreComponentCollection > > allScores(1);
-      vector<vector<float> > allLosses(1);
+	  cout << "\nEpoch " << epoch << std::endl;
+	  cumulativeWeights.ZeroAll();

+	  // compute sum in objective function after each epoch
+	  float maxSum = 0.0;

-			// MODEL
-      decoder->getNBest(input,
+	  //TODO: batching
+	  size_t batchSize = 1;
+	  size_t batch = 0;
+	  size_t shardPosition = 0;
+	  for (vector<size_t>::const_iterator sid = shard.begin(); sid != shard.end(); ++sid) {
+		  const string& input = inputSentences[*sid];
+		  const vector<string>& refs = referenceSentences[*sid];
+		  cout << "Input sentence " << *sid << ": \"" << input << "\"" << std::endl;
+
+		  // feature values for hypotheses i,j (matrix: batchSize x 3*n x featureValues)
+		  vector<vector<ScoreComponentCollection > > featureValues(batchSize);
+		  vector<vector<float> > bleuScores(batchSize);
+
+		  // MODEL
+		  cout << "Run decoder to get nbest wrt model score" << std::endl;
+		  decoder->getNBest(input,
                        *sid,
-                        modelHypoCount,
+                        n,
                        0.0,
                        1.0,
-                        allScores[0],
-                        allLosses[0]);
+                        featureValues[batch],
+                        bleuScores[batch]);

-
-			// HOPE
-      size_t oraclePos = allScores.size();
-      vector<const Word*> oracle =
-         decoder->getNBest(input,
-											 *sid,
-												modelHypoCount,
+		  // HOPE
+		  cout << "Run decoder to get nbest hope translations" << std::endl;
+		  size_t oraclePos = featureValues[batch].size();
+		  vector<const Word*> oracle = decoder->getNBest(input,
+						*sid,
+						n,
                        1.0,
                        1.0,
-                        allScores[0],
-                        allLosses[0]);
+                        featureValues[batch],
+                        bleuScores[batch]);

-      ScoreComponentCollection oracleScores = allScores[0][oraclePos];
-      float oracleLoss = allLosses[0][oraclePos];
+		  ScoreComponentCollection oracleFeatureValues = featureValues[batch][oraclePos];
+		  float oracleBleuScore = bleuScores[batch][oraclePos];
 			
-			// FEAR
-      decoder->getNBest(input,
+		  // FEAR
+		  cout << "Run decoder to get nbest fear translations" << std::endl;
+		  decoder->getNBest(input,
                        *sid,
-                        modelHypoCount,
+                        n,
                        -1.0,
                        1.0,
-                        allScores[0],
-                        allLosses[0]);
+                        featureValues[batch],
+                        bleuScores[batch]);

-      //set loss for each sentence as oracleloss - rawsentenceloss
-      for (size_t i = 0; i < allScores.size(); ++i) {
-        for (size_t j = 0; j < allScores[i].size(); ++j) {
-          allLosses[i][j] = oracleLoss - allLosses[i][j];
-        }
-      }
+	      // Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis)
+	      vector< vector<float> > losses(batchSize);
+	      for (size_t i = 0; i < batchSize; ++i) {
+	    	  for (size_t j = 0; j < bleuScores[i].size(); ++j) {
+	    		  losses[i].push_back(oracleBleuScore - bleuScores[i][j]);
+	    		  //cout << "loss[" << i << "," << j << "]" << endl;
+	    	  }
+	      }

+	      // get weight vector and set weight for bleu feature to 0
+	      ScoreComponentCollection mosesWeights = decoder->getWeights();
+	      const vector<const ScoreProducer*> featureFunctions = StaticData::Instance().GetTranslationSystem (TranslationSystem::DEFAULT).GetFeatureFunctions();
+	      mosesWeights.Assign(featureFunctions.back(), 0);
+	      ScoreComponentCollection oldWeights(mosesWeights);
 			
-      //run optimiser
-		  ScoreComponentCollection mosesWeights = decoder->getWeights();	
-			optimiser->updateWeights(mosesWeights
-															, allScores
-															, allLosses
-															, oracleScores);
-			
-      //update moses weights
-      mosesWeights.L1Normalise();
-      decoder->setWeights(mosesWeights);
+		  //run optimiser
+	      cout << "Run optimiser.." << endl;
+	      optimiser->updateWeights(mosesWeights, featureValues, losses, oracleFeatureValues);
+
+		  //update moses weights
+	      mosesWeights.L1Normalise();
+		  decoder->setWeights(mosesWeights);
  
-      //history (for approx doc bleu)
-      decoder->updateHistory(oracle);
-      cumulativeWeights.PlusEquals(mosesWeights);
-			decoder->cleanup();
+		  //history (for approx doc bleu)
+		  decoder->updateHistory(oracle);

-      ++shardPosition;
-      ++iterations;
+		  cumulativeWeights.PlusEquals(mosesWeights);
+		  decoder->cleanup();

-      //mix weights?
+	      // Compute objective for all hypotheses of a training source sentence
+	      // add max(l_ij - Delta_ij * w') for check on objective
+	      float maxDiff = 0.0;
+	      for (size_t j = 0; j < 3*n; ++j) {
+	    	  ScoreComponentCollection featureDiff(oracleFeatureValues);
+	    	  featureDiff.MinusEquals(featureValues[batch][j]);
+	    	  float tmpMaxDiff = losses[batch][j] - featureDiff.InnerProduct(mosesWeights);
+	    	  if (tmpMaxDiff > maxDiff) {
+	    		  maxDiff = tmpMaxDiff;
+	    	  }
+	      }
+
+	      maxSum += maxDiff;
+
+		  ++shardPosition;
+		  ++iterations;
+
+		  //mix weights?
 #ifdef MPI_ENABLE
-      if (shardPosition % (shard.size() / mixFrequency) == 0) {
-        ScoreComponentCollection averageWeights;
-        VERBOSE(1, "Rank: " << rank << "Before mixing: " << mosesWeights << endl);
-        mpi::reduce(world,mosesWeights,averageWeights,SCCPlus(),0);
-        if (rank == 0) {
-          averageWeights.MultiplyEquals(1.0f/size);
-          VERBOSE(1, "After mixing: " << averageWeights << endl);
-        }
-        mpi::broadcast(world,averageWeights,0);
-        decoder->setWeights(averageWeights);
-      }
+		  if (shardPosition % (shard.size() / mixFrequency) == 0) {
+			  ScoreComponentCollection averageWeights;
+			  VERBOSE(1, "Rank: " << rank << "Before mixing: " << mosesWeights << endl);
+			  mpi::reduce(world,mosesWeights,averageWeights,SCCPlus(),0);
+			  if (rank == 0) {
+				  averageWeights.MultiplyEquals(1.0f/size);
+				  VERBOSE(1, "After mixing: " << averageWeights << endl);
+			  }
+
+			  mpi::broadcast(world,averageWeights,0);
+			  decoder->setWeights(averageWeights);
+		  }
 #endif

-      //dump weights?
-      if (shardPosition % (shard.size() / weightDumpFrequency) == 0) {
-        ScoreComponentCollection totalWeights(cumulativeWeights);
+		  //dump weights?
+		  if (shardPosition % (shard.size() / weightDumpFrequency) == 0) {
+			  ScoreComponentCollection totalWeights(cumulativeWeights);
 #ifdef MPI_ENABLE
-      //average across processes
-      mpi::reduce(world,cumulativeWeights,totalWeights,SCCPlus(),0);
+			  //average across processes
+			  mpi::reduce(world,cumulativeWeights,totalWeights,SCCPlus(),0);
 #endif
-        if (rank == 0) {
-          cout << "WEIGHTS " << iterations << " ";
-          totalWeights.L1Normalise();
-          cout << totalWeights << endl;
-        }
-      }
-    }
+			  if (rank == 0) {
+				  cout << "Total weights (" << iterations << ") ";
+				  totalWeights.L1Normalise();
+				  cout << totalWeights << endl;
+			  }
+		  }
+	  }
+
+	  // how has the objective function changed?
+	  cout << "objective = " << maxSum << endl;
  }
-  

+  // take average of cumulative weights of last pass over all source sentences
+  cumulativeWeights.MultiplyEquals(1.0f/inputSentences.size());
+  
+  cerr << "Start weights: " << startWeights << endl;
+  cerr << "Averaged new weights: " << cumulativeWeights << endl;
+
+  tm = localtime(&now); // get struct filled out
+  cout << "End date/time: " << tm->tm_mon+1 << "/" << tm->tm_mday << "/" << tm->tm_year + 1900
+		    << ", " << tm->tm_hour << ":" << tm->tm_min << ":" << tm->tm_sec;

  exit(0);
 }
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@ -4,75 +4,187 @@ using namespace Moses;
 using namespace std;

 namespace Mira {
-	void MiraOptimiser::updateWeights(Moses::ScoreComponentCollection& weights,
-                         	const std::vector< std::vector<Moses::ScoreComponentCollection> >& scores,
-                         	const std::vector< std::vector<float> >& losses,
-                         	const Moses::ScoreComponentCollection& oracleScores) {

-	for(unsigned batch = 0; batch < scores.size(); batch++) {
+void MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
+		const vector< vector<ScoreComponentCollection> >& featureValues,
+		const vector< vector<float> >& losses,
+		const ScoreComponentCollection& oracleFeatureValues) {

-	  Moses::ScoreComponentCollection oldWeights(weights);
-	  float maxTranslation = -1000.0; //what wrong with FLT_MIN ?!
+	// TODO: do we need the oracle feature values?

-	  for(unsigned analyseSentence = 0; analyseSentence < scores[batch].size(); analyseSentence++) {
+	size_t numberOfUpdates = 0;

-            /* do this:
-            for(unsigned score = 0; score < scores[batch][analyseSentence].size(); score++) {
-              float currentScoreChange = oracleScores[score] - scores[batch][analyseSentence][score];
-              scoreChange += currentScoreChange * weights[score];
-              norm += currentScoreChange * currentScoreChange;
-            }
-            */ 
-            Moses::ScoreComponentCollection currentScoreColl = oracleScores;
-            currentScoreColl.MinusEquals(scores[batch][analyseSentence]);
-	    currentScoreColl.MultiplyEquals(weights);
-	    float scoreChange = currentScoreColl.InnerProduct(weights);
-	    float norm = currentScoreColl.InnerProduct(currentScoreColl);	     
+	cout << "Selected clipping scheme: " << m_clippingScheme << endl;
+	cout << "lower bound: " << m_lowerBound << endl;
+	cout << "upper bound: " << m_upperBound << endl;

-            float delta;
-            if(norm == 0.0) //just in case... :-)
-              delta = 0.0;
-            else {
-              
-              delta = (losses[batch][analyseSentence] - scoreChange) / norm;
+	vector< float> alphas(3*m_n);
+	for(size_t batch = 0; batch < featureValues.size(); batch++) {
+		if (m_clippingScheme == 2) {
+			// initialise alphas for each source (alpha for oracle translation = C, all other alphas = 0)
+			for (size_t j = 0; j < 3*m_n; ++j) {
+				if (j == m_n) {
+					// oracle
+					alphas[j] = m_upperBound;
+					std::cout << "alpha " << j << ": " << alphas[j] << endl;
+				}
+				else {
+					alphas[j] = 0;
+					std::cout << "alpha " << j << ": " << alphas[j] << endl;
+				}
+			}
+		}

-	      //now get in shape
-              if(delta > upperBound_)
-                delta = upperBound_;
-              else if(delta < lowerBound_)
-                delta = lowerBound_;
-	
-              cout << "scoreChange: " << scoreChange
-                   << "\ndelta: " << delta
-                   << "\nloss: " << losses[batch][analyseSentence] << endl;
-            }
-           
- 	    // do this:	weights += delta * (oracleScores - scores[batch][analyseSentence])
-            Moses::ScoreComponentCollection tempColl = oracleScores;
-            tempColl.MinusEquals(scores[batch][analyseSentence]);
-	    tempColl.MultiplyEquals(delta);
-	    weights.MinusEquals(tempColl);
+		// iterate over nbest lists of translations, feature list contains n*model, n*hope, n*fear)
+		// Combinations for j and j': hope/fear, hope/model, model/fear?
+		// Currently we compare each hope against each fear (10x10),
+		// each hope against each model (10x10), each model against each fear translation (10x10)
+		for (size_t j = 0; j < m_n; ++j) {
+			size_t indexModel_j = j;
+			size_t indexHope_j = j + m_n;		// e_ij'
+			size_t indexFear_j = j + 2*m_n;	// e_ij

-	    float tmp = losses[batch][analyseSentence] - oracleScores.InnerProduct(weights);
-	    if(tmp > maxTranslation)
-		maxTranslation = tmp;	
-			
-            //calculate max. for criterion
-            /*
- 	    float sumWeightedFeatures = 0.0;
-            for(unsigned score = 0; score < scores[analyseSentence]->size(); score++) {
-              sumWeightedFeatures += oracleScores[score]*newWeights[score];
-            }
+			for (size_t k = 0; k < m_n; ++k) {
+				size_t indexModel_k = k;
+				size_t indexHope_k = k + m_n;		// e_ij'
+				size_t indexFear_k = k + 2*m_n;	// e_ij

-	    if((losses[analyseSentence] - sumWeightedFeatures) > maxTranslation_) {
-              maxTranslation_ = losses[analyseSentence] - sumWeightedFeatures;
-	    } 
-	    */
-          }
-	  oldWeights.MinusEquals(weights);
-	  float criterion = 0.5*oldWeights.InnerProduct(oldWeights) + 0.01*maxTranslation;
-	  cout << "criterion: " << criterion << endl;
+				// Hypothesis pair hope/fear
+				// Compute delta:
+				cout << "\nComparing hope/fear (" << indexHope_j << "," << indexFear_k << ")" << endl;
+				ScoreComponentCollection featureValueDiffs;
+				float delta = computeDelta(currWeights, featureValues[batch], indexHope_j, indexFear_k, losses[batch], alphas, featureValueDiffs);
+
+				// update weight vector:
+				if (delta != 0) {
+					update(currWeights, featureValueDiffs, delta);
+					++numberOfUpdates;
+				}
+
+				// Hypothesis pair hope/model
+				// Compute delta:
+				cout << "\nComparing hope/model (" << indexHope_j << "," << indexModel_k << ")" << endl;
+				featureValueDiffs.ZeroAll();
+				delta = computeDelta(currWeights, featureValues[batch], indexHope_j, indexModel_k, losses[batch], alphas, featureValueDiffs);
+
+				// update weight vector:
+				if (delta != 0) {
+					update(currWeights, featureValueDiffs, delta);
+					++numberOfUpdates;
+				}
+
+				// Hypothesis pair model/fear
+				// Compute delta:
+				cout << "\nComparing model/fear (" << indexModel_j << "," << indexFear_k << ")" << endl;
+				featureValueDiffs.ZeroAll();
+				delta = computeDelta(currWeights, featureValues[batch], indexModel_j, indexFear_k, losses[batch], alphas, featureValueDiffs);
+
+				// update weight vector:
+				if (delta != 0) {
+					update(currWeights, featureValueDiffs, delta);
+					++numberOfUpdates;
+				}
+			}
+
+			cout << endl;
+		}
 	}
-  }
+
+	cout << "Number of updates: " << numberOfUpdates << endl;
+}
+
+/*
+ * Compute delta for weight update.
+ * As part of this compute feature value differences
+ * Dh_ij - Dh_ij' ---> h(e_ij') - h(e_ij)) --> h(hope) - h(fear)
+ * which are used in the delta term and in the weight update term.
+ */
+float MiraOptimiser::computeDelta(ScoreComponentCollection& currWeights,
+		const vector< ScoreComponentCollection>& featureValues,
+		const size_t indexHope,
+		const size_t indexFear,
+		const vector< float>& losses,
+		vector< float>& alphas,
+		ScoreComponentCollection& featureValueDiffs) {
+
+	const ScoreComponentCollection featureValuesHope = featureValues[indexHope];		// hypothesis j'
+	const ScoreComponentCollection featureValuesFear = featureValues[indexFear];		// hypothesis j
+
+	// compute delta
+	float delta = 0.0;
+	float diffOfModelScores = 0.0;		// (Dh_ij - Dh_ij') * w' --->  (h(e_ij') - h(e_ij))) * w' (inner product)
+	float squaredNorm = 0.0;			// ||Dh_ij - Dh_ij'||^2  --->  sum over squares of elements of h(e_ij') - h(e_ij)
+
+	featureValueDiffs = featureValuesHope;
+	featureValueDiffs.MinusEquals(featureValuesFear);
+	cout << "feature value diffs: " << featureValueDiffs << endl;
+	squaredNorm = featureValueDiffs.InnerProduct(featureValueDiffs);
+	diffOfModelScores = featureValueDiffs.InnerProduct(currWeights);
+
+	if (squaredNorm == 0.0) {
+		delta = 0.0;
+	}
+	else {
+		// loss difference used to compute delta: (l_ij - l_ij')  --->  B(e_ij') - B(e_ij)
+		// TODO: simplify and use BLEU scores of hypotheses directly?
+		float lossDiff = losses[indexFear] - losses[indexHope];
+		delta = (lossDiff - diffOfModelScores) / squaredNorm;
+		cout << "delta: " << delta << endl;
+		cout << "loss diff - model diff: " << lossDiff << " - " << diffOfModelScores << endl;
+
+		// clipping
+		switch (m_clippingScheme) {
+		case 1:
+			if (delta > m_upperBound) {
+				cout << "clipping " << delta << " to " << m_upperBound << endl;
+				delta = m_upperBound;
+			}
+			else if (delta < m_lowerBound) {
+				cout << "clipping " << delta << " to " << m_lowerBound << endl;
+				delta = m_lowerBound;
+			}
+
+			// TODO: update
+			//m_lowerBound += delta;
+			//m_upperBound -= delta;
+			//cout << "m_lowerBound = " << m_lowerBound << endl;
+			//cout << "m_upperBound = " << m_upperBound << endl;
+
+			break;
+		case 2:
+			// fear translation: e_ij  --> alpha_ij  = alpha_ij  + delta
+			// hope translation: e_ij' --> alpha_ij' = alpha_ij' - delta
+			// clipping interval: [-alpha_ij, alpha_ij']
+			// clip delta
+			cout << "Interval [" << (-1 * alphas[indexFear]) << "," << alphas[indexHope] << "]" << endl;
+			if (delta > alphas[indexHope]) {
+				cout << "clipping " << delta << " to " << alphas[indexHope] << endl;
+				delta = alphas[indexHope];
+			}
+			else if (delta < (-1 * alphas[indexFear])) {
+				cout << "clipping " << delta << " to " << (-1 * alphas[indexFear]) << endl;
+				delta = (-1 * alphas[indexFear]);
+			}
+
+			// update alphas
+			alphas[indexHope] -= delta;
+			alphas[indexFear] += delta;
+			cout << "alpha[" << indexHope << "] = " << alphas[indexHope] << endl;
+			cout << "alpha[" << indexFear << "] = " << alphas[indexFear] << endl;
+			break;
+		}
+	}
+
+	return delta;
+}
+
+/*
+ * Update the weight vector according to delta and the feature value difference
+ * w' = w' + delta * (Dh_ij - Dh_ij') ---> w' = w' + delta * (h(e_ij') - h(e_ij)))
+ */
+void MiraOptimiser::update(ScoreComponentCollection& currWeights, ScoreComponentCollection& featureValueDiffs, const float delta) {
+	featureValueDiffs.MultiplyEquals(delta);
+	currWeights.PlusEquals(featureValueDiffs);
+}
 }

--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -54,24 +54,43 @@ namespace Mira {
                         const Moses::ScoreComponentCollection& oracleScores);
  };

-
  class MiraOptimiser : public Optimiser {
   public:
-     MiraOptimiser(float lowerBound, float upperBound) :
-       Optimiser(),
-       lowerBound_(lowerBound),
-       upperBound_(upperBound) { }
+	  MiraOptimiser() :
+		  Optimiser() { }

-     ~MiraOptimiser() {} 
+	  MiraOptimiser(size_t n, size_t clippingScheme, float lowerBound, float upperBound) :
+		  Optimiser(),
+		  m_n(n),
+		  m_clippingScheme(clippingScheme),
+		  m_lowerBound(lowerBound),
+		  m_upperBound(upperBound) { }
+
+     ~MiraOptimiser() {}
   
      virtual void updateWeights(Moses::ScoreComponentCollection& weights,
-                         const std::vector< std::vector<Moses::ScoreComponentCollection> >& scores,
-                         const std::vector< std::vector<float> >& losses,
-                         const Moses::ScoreComponentCollection& oracleScores);
+      						  const std::vector< std::vector<Moses::ScoreComponentCollection> >& scores,
+      						  const std::vector< std::vector<float> >& losses,
+      						  const Moses::ScoreComponentCollection& oracleScores);
+      float computeDelta(Moses::ScoreComponentCollection& currWeights,
+      				const std::vector< Moses::ScoreComponentCollection>& featureValues,
+      				const size_t indexHope,
+      				const size_t indexFear,
+      				const std::vector< float>& losses,
+      				std::vector< float>& alphas,
+      				Moses::ScoreComponentCollection& featureValueDiffs);
+      void update(Moses::ScoreComponentCollection& currWeights, Moses::ScoreComponentCollection& featureValueDiffs, const float delta);
  
   private:
-     float lowerBound_;
-     float upperBound_;
+      // number of hypotheses used for each nbest list (number of hope, fear, best model translations)
+      size_t m_n;
+
+      // clipping scheme for weight updates
+      // 1: equal, 2: varied
+      size_t m_clippingScheme;
+
+      float m_lowerBound;
+      float m_upperBound;
  };
 }

--- a/mira/Perceptron.cpp
+++ b/mira/Perceptron.cpp
@ -25,19 +25,18 @@ using namespace std;
 namespace Mira {

 void Perceptron::updateWeights(ScoreComponentCollection& currWeights,
-                   const vector< vector<ScoreComponentCollection> >& scores,
-                   const vector<vector<float> >& losses,
-                   const ScoreComponentCollection& oracleScores)
+		const vector< vector<ScoreComponentCollection> >& scores,
+		const vector<vector<float> >& losses,
+		const ScoreComponentCollection& oracleScores)
 {
-  for (size_t i = 0; i < scores.size(); ++i) {
-    for (size_t j = 0; j < scores[i].size(); ++j) {
-      if (losses[i][j] > 0) {
-        currWeights.MinusEquals(scores[i][j]);
-        currWeights.PlusEquals(oracleScores);      
-      }
-    }
-  }
-}
-
+	for (size_t i = 0; i < scores.size(); ++i) {
+		for (size_t j = 0; j < scores[i].size(); ++j) {
+			if (losses[i][j] > 0) {
+				currWeights.MinusEquals(scores[i][j]);
+				currWeights.PlusEquals(oracleScores);
+			}
+		}
+	}
+}
 }