code clean-up, step 1

git-svn-id: http://svn.statmt.org/repository/mira@3918 cc96ff50-19ce-11e0-b349-13d7f0bd23df
2024-12-26 13:23:25 +03:00 · 2011-06-28 11:35:59 +00:00 · 2011-06-28 11:35:59 +00:00 · 120be1df4f
commit 120be1df4f
parent 8e6c963041
10 changed files with 59 additions and 564 deletions
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@ -32,8 +32,6 @@ using namespace Moses;

 namespace Mira {

-  //Decoder::~Decoder() {}
-
  /**
    * Allocates a char* and copies string into it.
  **/
@ -70,8 +68,8 @@ namespace Mira {
 
  MosesDecoder::MosesDecoder(bool scaleByInputLength, float historySmoothing)
 		: m_manager(NULL) {
-			// force initialisation of the phrase dictionary (TODO: what for?)
-			const StaticData &staticData = StaticData::Instance();
+	  // force initialisation of the phrase dictionary (TODO: why?)
+	  const StaticData &staticData = StaticData::Instance();
      m_sentence = new Sentence(Input);
      stringstream in("Initialising decoder..\n");
      const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
@ -176,61 +174,6 @@ namespace Mira {
    return best;
  }

-  vector<float> MosesDecoder::getBleuAndScore(const std::string& source,
-                                size_t sentenceid,
-                                float bleuObjectiveWeight,
-                                float bleuScoreWeight,
-                                bool distinct,
-                                size_t rank,
-                                size_t epoch)
-  {
-  	StaticData &staticData = StaticData::InstanceNonConst();
-
-  	m_sentence = new Sentence(Input);
-  	stringstream in(source + "\n");
-  	const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
-  	m_sentence->Read(in,inputFactorOrder);
-  	const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
-
-  	// set the weight for the bleu feature
-  	ostringstream bleuWeightStr;
-  	bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight);
-  	PARAM_VEC bleuWeight(1,bleuWeightStr.str());
-
-  	staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight);
-  	staticData.ReLoadBleuScoreFeatureParameter();
-
-  	m_bleuScoreFeature->SetCurrentSourceLength((*m_sentence).GetSize());
-  	m_bleuScoreFeature->SetCurrentReference(sentenceid);
-
-  	//run the decoder
-  	m_manager = new Moses::Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system);
-  	m_manager->ProcessSentence();
-  	TrellisPathList sentences;
-  	m_manager->CalcNBest(1, sentences, distinct);
-
-  	// read off the feature values and bleu scores for each sentence in the nbest list
-  	Moses::TrellisPathList::const_iterator iter = sentences.begin();
-  	vector<float> bleuAndScore;
-  	const Moses::TrellisPath &path = **iter;
-  	float bleuScore = getBleuScore(path.GetScoreBreakdown());
-  	float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
-  	bleuAndScore.push_back(bleuScore);
-  	bleuAndScore.push_back(scoreWithoutBleu);
-
-  	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", 1best translation: ");
-  	Phrase phrase = path.GetTargetPhrase();
-  	for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
-  		const Word &word = phrase.GetWord(pos);
-  		Word *newWord = new Word(word);
-  		VERBOSE(1, *newWord);
-  	}
-
-  	VERBOSE(1, endl);
-
-  	return bleuAndScore;
-  }
-
  size_t MosesDecoder::getCurrentInputLength() {
 	  return (*m_sentence).GetSize();
  }
@ -270,27 +213,5 @@ namespace Mira {
  void MosesDecoder::printReferenceLength(const vector<size_t>& ref_ids) {
  	m_bleuScoreFeature->PrintReferenceLength(ref_ids);
  }
-
-  vector<float> MosesDecoder::calculateBleuOfCorpus(const vector< vector< const Word*> >& words, vector<size_t>& ref_ids, size_t epoch, size_t rank) {
-  	  vector<float> bleu = m_bleuScoreFeature->CalculateBleuOfCorpus(words, ref_ids);
-	  if (bleu.size() > 0) {
-	    cerr << "\nRank " << rank << ", BLEU after epoch " << epoch << ": " << bleu[4]*100 << ", "
-	   << bleu[0]*100 << "/" << bleu[1]*100 << "/" << bleu[2]*100 << "/" << bleu[3]*100 << " "
-		 << "(BP=" << bleu[5] << ", " << "ratio=" << bleu[6] << ", "
-		 << "hyp_len=" << bleu[7] << ", ref_len=" << bleu[8] << ")" << endl;
-	    vector<float> bleuAndRatio(2);
-	    bleuAndRatio[0] = bleu[4]*100;
-	    bleuAndRatio[1] = bleu[6];
-	    return bleuAndRatio;
-	  }
-	  else {
-	  	cerr << "\nRank " << rank << ", BLEU after epoch " << epoch << ": 0" << endl;
-	  	vector<float> bleuAndRatio(2);
-	  	bleuAndRatio[0] = 0;
-	  	bleuAndRatio[1] = 0;
-	  	return bleuAndRatio;
-	  }
-  }
-
 } 

--- a/mira/Decoder.h
+++ b/mira/Decoder.h
@ -64,20 +64,12 @@ class MosesDecoder {
                          bool distinct,
                          size_t rank,
                          size_t epoch);
-    std::vector<float> getBleuAndScore(const std::string& source,
-													size_t sentenceid,
-													float bleuObjectiveWeight,
-													float bleuScoreWeight,
-													bool distinct,
-													size_t rank,
-													size_t epoch);
    size_t getCurrentInputLength();
    void updateHistory(const std::vector<const Moses::Word*>& words);
    void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
    void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
    void printBleuFeatureHistory(std::ostream& out);
    void printReferenceLength(const std::vector<size_t>& ref_ids);
-    std::vector<float> calculateBleuOfCorpus(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& ref_ids, size_t epoch, size_t rank);
    Moses::ScoreComponentCollection getWeights();
    void setWeights(const Moses::ScoreComponentCollection& weights);
 	void cleanup();
--- a/mira/Hildreth.cpp
+++ b/mira/Hildreth.cpp
@ -5,187 +5,6 @@ using namespace std;

 namespace Mira {

-vector<FValue> Hildreth::optimise (const vector<FVector>& a, const vector<FValue>& b) {
-
-  size_t i;
-  int max_iter = 10000;
-  float eps = 0.00000001;
-  float zero = 0.000000000001;
-
-  vector<FValue> alpha ( b.size() );
-  vector<FValue> F ( b.size() );
-  vector<FValue> kkt ( b.size() );
-
-  float max_kkt = -1e100;
-
-  size_t K = b.size();
-
-  float A[K][K];
-  bool is_computed[K];
-  for ( i = 0; i < K; i++ )
-  {
-    A[i][i] = a[i].inner_product(a[i]);
-    is_computed[i] = false;
-  }
-
-  int max_kkt_i = -1;
-
-
-  for ( i = 0; i < b.size(); i++ )
-  {
-    F[i] = b[i];
-    kkt[i] = F[i];
-    if ( kkt[i] > max_kkt )
-    {
-      max_kkt = kkt[i];
-      max_kkt_i = i;
-    }
-  }
-
-  int iter = 0;
-  FValue diff_alpha;
-  FValue try_alpha;
-  FValue add_alpha;
-
-  while ( max_kkt >= eps && iter < max_iter )
-  {
-
-    diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
-    try_alpha = alpha[max_kkt_i] + diff_alpha;
-    add_alpha = 0.0;
-
-    if ( try_alpha < 0.0 )
-      add_alpha = -1.0 * alpha[max_kkt_i];
-    else
-      add_alpha = diff_alpha;
-
-    alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
-
-    if ( !is_computed[max_kkt_i] )
-    {
-      for ( i = 0; i < K; i++ )
-      {
-        A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
-        //A[i][max_kkt_i] = 0; // for version 1
-        is_computed[max_kkt_i] = true;
-      }
-    }
-
-    for ( i = 0; i < F.size(); i++ )
-    {
-      F[i] -= add_alpha * A[i][max_kkt_i];
-      kkt[i] = F[i];
-      if ( alpha[i] > zero )
-        kkt[i] = abs ( F[i] );
-    }
-    max_kkt = -1e100;
-    max_kkt_i = -1;
-    for ( i = 0; i < F.size(); i++ )
-      if ( kkt[i] > max_kkt )
-      {
-        max_kkt = kkt[i];
-        max_kkt_i = i;
-      }
-
-    iter++;
-  }
-
-  return alpha;
-}
-
-vector<FValue> Hildreth::optimise (const vector<FVector>& a, const vector<FValue>& b, FValue C) {
-
-  size_t i;
-  int max_iter = 10000;
-  FValue eps = 0.00000001;
-  FValue zero = 0.000000000001;
-
-  vector<FValue> alpha ( b.size() );
-  vector<FValue> F ( b.size() );
-  vector<FValue> kkt ( b.size() );
-
-  float max_kkt = -1e100;
-
-  size_t K = b.size();
-
-  float A[K][K];
-  bool is_computed[K];
-  for ( i = 0; i < K; i++ )
-  {
-    A[i][i] = a[i].inner_product(a[i]);
-    is_computed[i] = false;
-  }
-
-  int max_kkt_i = -1;
-
-
-  for ( i = 0; i < b.size(); i++ )
-  {
-    F[i] = b[i];
-    kkt[i] = F[i];
-    if ( kkt[i] > max_kkt )
-    {
-      max_kkt = kkt[i];
-      max_kkt_i = i;
-    }
-  }
-
-  int iter = 0;
-  FValue diff_alpha;
-  FValue try_alpha;
-  FValue add_alpha;
-
-  while ( max_kkt >= eps && iter < max_iter )
-  {
-
-    diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
-    try_alpha = alpha[max_kkt_i] + diff_alpha;
-    add_alpha = 0.0;
-
-    if ( try_alpha < 0.0 )
-      add_alpha = -1.0 * alpha[max_kkt_i];
-    else if (try_alpha > C)
-			add_alpha = C - alpha[max_kkt_i];
-    else
-      add_alpha = diff_alpha;
-
-    alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
-
-    if ( !is_computed[max_kkt_i] )
-    {
-      for ( i = 0; i < K; i++ )
-      {
-        A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
-        //A[i][max_kkt_i] = 0; // for version 1
-        is_computed[max_kkt_i] = true;
-      }
-    }
-
-    for ( i = 0; i < F.size(); i++ )
-    {
-      F[i] -= add_alpha * A[i][max_kkt_i];
-      kkt[i] = F[i];
-      if (alpha[i] > C - zero)
-				kkt[i]=-kkt[i];
-			else if (alpha[i] > zero)
-				kkt[i] = abs(F[i]);
-
-    }
-    max_kkt = -1e100;
-    max_kkt_i = -1;
-    for ( i = 0; i < F.size(); i++ )
-      if ( kkt[i] > max_kkt )
-      {
-        max_kkt = kkt[i];
-        max_kkt_i = i;
-      }
-
-    iter++;
-  }
-
-  return alpha;
-}
-
  vector<FValue> Hildreth::optimise (const vector<ScoreComponentCollection>& a, const vector<FValue>& b) {

    size_t i;
--- a/mira/Hildreth.h
+++ b/mira/Hildreth.h
@ -5,8 +5,6 @@ namespace Mira {

  class Hildreth {
    public :
-			static std::vector<Moses::FValue> optimise (const std::vector<Moses::FVector>& a, const std::vector<Moses::FValue>& b );
-			static std::vector<Moses::FValue> optimise (const std::vector<Moses::FVector>& a, const std::vector<Moses::FValue>& b, Moses::FValue C);
      static std::vector<Moses::FValue> optimise (const std::vector<Moses::ScoreComponentCollection>& a, const std::vector<Moses::FValue>& b );
      static std::vector<Moses::FValue> optimise (const std::vector<Moses::ScoreComponentCollection>& a, const std::vector<Moses::FValue>& b, Moses::FValue C);
  };
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@ -166,7 +166,6 @@ int main(int argc, char** argv) {
 	string decoder_settings;
 	float min_weight_change;
 	float decrease_learning_rate;
-	bool devBleu;
 	bool normaliseWeights;
 	bool print_feature_values;
 	bool historyOf1best;
@ -178,7 +177,6 @@ int main(int argc, char** argv) {
 	float bleuScoreWeight;
 	float margin_slack;
 	float margin_slack_incr;
-	bool analytical_update;
 	bool perceptron_update;
 	bool hope_fear;
 	bool model_hope_fear;
@ -189,7 +187,6 @@ int main(int argc, char** argv) {
 	desc.add_options()
 		("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
 		("adapt-after-epoch", po::value<size_t>(&adapt_after_epoch)->default_value(0), "Index of epoch after which adaptive parameters will be adapted")
-		("analytical-update",  po::value<bool>(&analytical_update)->default_value(0), "Use one best lists and compute the update analytically")
 		("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
 		("base-of-log", po::value<size_t>(&baseOfLog)->default_value(10), "Base for log-ing feature values")
 		("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
@ -201,9 +198,7 @@ int main(int argc, char** argv) {
 		("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
 		("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
 		("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
-		("dev-bleu", po::value<bool>(&devBleu)->default_value(true), "Compute BLEU score of oracle translations of the whole tuning set")
 		("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use nbest list with distinct translations in inference step")
-		("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
 		("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
 		("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
 		("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
@ -214,12 +209,12 @@ int main(int argc, char** argv) {
 		("hope-n", po::value<int>(&hope_n)->default_value(-1), "Number of hope translations used")
 		("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
 		("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
-		("margin-slack", po::value<float>(&margin_slack)->default_value(0), "Slack when comparing left and right hand side of constraints")
-		("margin-incr", po::value<float>(&margin_slack_incr)->default_value(0), "Increment margin slack after every epoch by this amount")
-		("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
 		("log-feature-values", po::value<bool>(&logFeatureValues)->default_value(false), "Take log of feature values according to the given base.")
+		("margin-incr", po::value<float>(&margin_slack_incr)->default_value(0), "Increment margin slack after every epoch by this amount")
+		("margin-slack", po::value<float>(&margin_slack)->default_value(0), "Slack when comparing left and right hand side of constraints")
 		("min-learning-rate", po::value<float>(&min_learning_rate)->default_value(0), "Set a minimum learning rate")
 		("min-weight-change", po::value<float>(&min_weight_change)->default_value(0.01), "Set minimum weight change for stopping criterion")
+		("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
 		("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(5), "How often per epoch to mix weights, when using mpi")
 		("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimization")
 		("nbest,n", po::value<size_t>(&n)->default_value(1), "Number of translations in nbest list")
@ -229,6 +224,8 @@ int main(int argc, char** argv) {
 		("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
 		("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
 		("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
+		("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
+		("scale-update", po::value<bool>(&scale_update)->default_value(false), "Scale the update by the Bleu score of the oracle translation")
 		("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(true), "Use a sentences level bleu scoring function")
 		("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
 	    ("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimizer")
@ -236,8 +233,7 @@ int main(int argc, char** argv) {
 	    ("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
 	    ("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
 	    ("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
-	    ("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
-	    ("scale-update", po::value<bool>(&scale_update)->default_value(false), "Scale the update by the Bleu score of the oracle translation")
+	    ("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
 	    ("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");

 	po::options_description cmdline_options;
@ -355,42 +351,31 @@ int main(int argc, char** argv) {
 		perceptron_update = true;
 		model_hope_fear = false; // mira only
 		hope_fear = false; // mira only
-		analytical_update = false; // mira only
 	} else {
 		cerr << "Error: Unknown optimiser: " << learner << endl;
 		return 1;
 	}

 	// resolve parameter dependencies
-	if (perceptron_update || analytical_update) {
+	if (batchSize > 1 && perceptron_update) {
 		batchSize = 1;
-		cerr << "Info: Setting batch size to 1 for perceptron/analytical update" << endl;
+		cerr << "Info: Setting batch size to 1 for perceptron update" << endl;
 	}
-
 	if (hope_n == -1 && fear_n == -1) {
 		hope_n = n;
 		fear_n = n;
 	}
-
-	if ((model_hope_fear || analytical_update) && hope_fear) {
+	if (model_hope_fear && hope_fear) {
 		hope_fear = false; // is true by default
 	}
-
-	if (!hope_fear && !analytical_update) {
+	if (!hope_fear) {
 		model_hope_fear = true;
 	}
-
-	if (model_hope_fear && analytical_update) {
-		cerr << "Error: Must choose between model-hope-fear and analytical update" << endl;
-		return 1;
-	}
-
 	if (!sentenceLevelBleu) {
 		if (!historyOf1best && !historyOfOracles) {
 			historyOf1best = true;
 		}
 	}
-
 	if (burnIn && sentenceLevelBleu) {
 		burnIn = false;
 		cerr << "Info: Burn-in not needed when using sentence-level BLEU, deactivating burn-in." << endl;
@ -545,7 +530,6 @@ int main(int argc, char** argv) {
 	int sumStillViolatedConstraints_lastEpoch = 0;
 	int sumConstraintChangeAbs;
 	int sumConstraintChangeAbs_lastEpoch = 0;
-//	size_t sumBleuChangeAbs;
 	float *sendbuf, *recvbuf;
 	sendbuf = (float *) malloc(sizeof(float));
 	recvbuf = (float *) malloc(sizeof(float));
@ -553,7 +537,6 @@ int main(int argc, char** argv) {
 		// sum of violated constraints
 		sumStillViolatedConstraints = 0;
 		sumConstraintChangeAbs = 0;
-//		sumBleuChangeAbs = 0;

 		numberOfUpdatesThisEpoch = 0;
 		// Sum up weights over one epoch, final average uses weights from last epoch
@ -619,7 +602,7 @@ int main(int argc, char** argv) {
 				dummyFeatureValues.push_back(newFeatureValues);
 				dummyBleuScores.push_back(newBleuScores);

-				if (perceptron_update || analytical_update) {
+				if (perceptron_update) {
 					if (historyOf1best) {
 						// MODEL (for updating the history)
 						cerr << "Rank " << rank << ", run decoder to get 1best wrt model score (for history)" << endl;
@ -778,15 +761,6 @@ int main(int argc, char** argv) {
 				}
 			}

-/*			// get 1best model results with old weights
-			vector< vector <float > > bestModelOld_batch;
-			for (size_t i = 0; i < actualBatchSize; ++i) {
-				string& input = inputSentences[*current_sid_start + i];
-				vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch);
-				bestModelOld_batch.push_back(bestModelOld);
-				decoder->cleanup();
-			}*/
-
 			// optionally print out the feature values
 			if (print_feature_values) {
 				cerr << "\nRank " << rank << ", epoch " << epoch << ", feature values: " << endl;
@ -823,14 +797,9 @@ int main(int argc, char** argv) {
 				vector<vector<float> > dummy1;
 				vector<size_t> dummy2;
 				update_status = optimiser->updateWeightsHopeFear(mosesWeights,
-						featureValuesHope, featureValuesFear,	dummy1, dummy1, dummy2,
+						featureValuesHope, featureValuesFear, dummy1, dummy1, dummy2,
 						learning_rate, rank, epoch);
 			}
-			else if (analytical_update) {
-					update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(mosesWeights,
-							featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0], bleuScoresFear[0][0],
-							ref_ids[0], learning_rate, rank, epoch);
-			}
 			else {
 				if (hope_fear) {
 					if (coreWeightMap.size() > 0) {
@ -859,7 +828,7 @@ int main(int argc, char** argv) {
 					}

 					update_status = optimiser->updateWeightsHopeFear(mosesWeights,
-							featureValuesHope, featureValuesFear,	bleuScoresHope, bleuScoresFear, ref_ids,
+							featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, ref_ids,
 							learning_rate, rank, epoch);
 				}
 				else {
@ -900,17 +869,6 @@ int main(int argc, char** argv) {
 			weightDifference.MinusEquals(oldWeights);
 			VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weight difference: " << weightDifference << endl);

-/*			// get 1best model results with new weights (for each sentence in batch)
-			vector<float> bestModelNew;
-			for (size_t i = 0; i < actualBatchSize; ++i) {
-				string& input = inputSentences[*current_sid_start + i];
-				bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch);
-				decoder->cleanup();
-				sumBleuChangeAbs += abs(bestModelOld_batch[i][0] - bestModelNew[0]);
-				VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl);
-				VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl);
-			}*/
-
 			// update history (for approximate document Bleu)
 			if (sentenceLevelBleu) {
 				for (size_t i = 0; i < oracles.size(); ++i) {
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@ -110,7 +110,7 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	  	ScoreComponentCollection update(featureValueDiffs[k]);
 	    update.MultiplyEquals(alpha);
 	    
-	    // sum up update
+	    // sum updates
 	    summedUpdate.PlusEquals(update);
 	  }
 	} 
@ -122,24 +122,6 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	  return status;
 	}

-	ScoreComponentCollection newWeights(currWeights);
-	newWeights.PlusEquals(summedUpdate);
-
-	// Sanity check: are there still violated constraints after optimisation?
-	int violatedConstraintsAfter = 0;
-	float newDistanceFromOptimum = 0;
-	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
-		float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights);
-		float loss = all_losses[i];
-		float diff = loss - (modelScoreDiff + m_margin_slack);
-		if (diff > epsilon) {
-			++violatedConstraintsAfter;
-			newDistanceFromOptimum += diff;
-		}
-	}
-	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
-	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
-
 	// apply learning rate
 	if (learning_rate != 1) {
 		VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl);
@ -158,6 +140,21 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	currWeights.PlusEquals(summedUpdate);
 	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);

+	// Sanity check: are there still violated constraints after optimisation?
+	int violatedConstraintsAfter = 0;
+	float newDistanceFromOptimum = 0;
+	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
+		float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
+		float loss = all_losses[i];
+		float diff = loss - (modelScoreDiff + m_margin_slack);
+		if (diff > epsilon) {
+			++violatedConstraintsAfter;
+			newDistanceFromOptimum += diff;
+		}
+	}
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
+
 	vector<int> status(2);
 	status[0] = violatedConstraintsBefore;
 	status[1] = violatedConstraintsAfter;
@ -291,25 +288,7 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 	  return status;
 	}

-	ScoreComponentCollection newWeights(currWeights);
-	newWeights.PlusEquals(summedUpdate);
-
-	// Sanity check: are there still violated constraints after optimisation?
-	int violatedConstraintsAfter = 0;
-	float newDistanceFromOptimum = 0;
-	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
-		float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights);
-		float loss = all_losses[i];
-		float diff = loss - (modelScoreDiff + m_margin_slack);
-		if (diff > epsilon) {
-			++violatedConstraintsAfter;
-			newDistanceFromOptimum += diff;
-		}
-	}
-	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
-	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
-
-	// Apply learning rate (fixed or flexible)
+	// apply learning rate
 	if (learning_rate != 1) {
 		VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl);
 		summedUpdate.MultiplyEquals(learning_rate);
@ -321,107 +300,27 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 	currWeights.PlusEquals(summedUpdate);
 	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);

+	// Sanity check: are there still violated constraints after optimisation?
+	int violatedConstraintsAfter = 0;
+	float newDistanceFromOptimum = 0;
+	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
+		float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
+		float loss = all_losses[i];
+		float diff = loss - (modelScoreDiff + m_margin_slack);
+		if (diff > epsilon) {
+			++violatedConstraintsAfter;
+			newDistanceFromOptimum += diff;
+		}
+	}
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
+
+
 	vector<int> statusPlus(2);
 	statusPlus[0] = violatedConstraintsBefore;
 	statusPlus[1] = violatedConstraintsAfter;
 	return statusPlus;
 }

-vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& currWeights,
-	ScoreComponentCollection& featureValuesHope,
-    ScoreComponentCollection& featureValuesFear,
-    float bleuScoreHope,
-    float bleuScoreFear,
-    size_t sentenceId,
-    float learning_rate,
-    size_t rank,
-    size_t epoch) {
-
-  float epsilon = 0.0001;
-  float oldDistanceFromOptimum = 0;
-  bool constraintViolatedBefore = false;
-  ScoreComponentCollection weightUpdate;
-
- // cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl;
- // cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl;
-  ScoreComponentCollection featureValueDiff = featureValuesHope;
-  featureValueDiff.MinusEquals(featureValuesFear);
-  cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
-  float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
-  float loss = bleuScoreHope - bleuScoreFear;
-  float diff = 0;
-  if (loss > (modelScoreDiff + m_margin_slack)) {
-	  diff = loss - (modelScoreDiff + m_margin_slack);
-  }
-  cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
-
-  if (diff > epsilon) {
-    // constraint violated
-    oldDistanceFromOptimum += diff;
-    constraintViolatedBefore = true;
-
-    // compute alpha for given constraint: (loss - model score diff) / || feature value diff ||^2
-    // featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm() == featureValueDiff.InnerProduct(featureValueDiff)
-    // from Crammer&Singer 2006: alpha = min {C , l_t/ ||x||^2}
-    float squaredNorm = featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm();
-
-    if (squaredNorm > 0) {
-    	float alpha = diff / squaredNorm;
-    	if (m_slack > 0 ) {
-    		if (alpha > m_slack) {
-    			alpha = m_slack;
-    		}
-    		else if (alpha < m_slack*(-1)) {
-    			alpha = m_slack*(-1);
-    		}
-    	}
-
-    	cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl;
-    	featureValueDiff.MultiplyEquals(alpha);
-    	weightUpdate.PlusEquals(featureValueDiff);
-    }
-    else {
-    	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", no update because squared norm is 0" << endl);
-    }
-  }
-
-  if (!constraintViolatedBefore) {
-    // constraint satisfied, nothing to do
-	cerr << "Rank " << rank << ", epoch " << epoch << ", constraint already satisfied" << endl;
-    vector<int> status(2);
-    status[0] = 0;
-    status[1] = 0;
-    return status;
-  }
-
-  // sanity check: constraint still violated after optimisation?
-  ScoreComponentCollection newWeights(currWeights);
-  newWeights.PlusEquals(weightUpdate);
-  bool constraintViolatedAfter = false;
-  float newDistanceFromOptimum = 0;
-  featureValueDiff = featureValuesHope;
-  featureValueDiff.MinusEquals(featureValuesFear);
-  modelScoreDiff = featureValueDiff.InnerProduct(newWeights);
-  diff = loss - (modelScoreDiff + m_margin_slack);
-  // approximate comparison between floats!
-  if (diff > epsilon) {
-    constraintViolatedAfter = true;
-    newDistanceFromOptimum += (loss - modelScoreDiff);
-  }
-
-  VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl);
-  VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
-
-  // apply update to weight vector
-  VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl);
-  currWeights.PlusEquals(weightUpdate);
-  VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
-
-  vector<int> status(2);
-  status[0] = 1;
-  status[1] = constraintViolatedAfter ? 1 : 0;
-  return status;
-}
-
 }

--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -67,15 +67,6 @@ namespace Mira {
 		  m_scale_update(scale_update),
 		  m_margin_slack(margin_slack) { }
   
-     std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights,
-							Moses::ScoreComponentCollection& featureValuesHope,
-							Moses::ScoreComponentCollection& featureValuesFear,
-							float bleuScoresHope,
-							float bleuScoresFear,
-							size_t sentenceId,
-							float learning_rate,
-							size_t rank,
-							size_t epoch);
     std::vector<int> updateWeights(Moses::ScoreComponentCollection& currWeights,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
      						  const std::vector<std::vector<float> >& losses,
@ -117,6 +108,7 @@ namespace Mira {
      // scale update with log 10 of oracle BLEU score
      bool m_scale_update;

+      // slack when comparing losses to model scores
      float m_margin_slack;
  };
 }
--- a/mira/Perceptron.cpp
+++ b/mira/Perceptron.cpp
@ -45,7 +45,6 @@ vector<int> Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeig
 	vector<int> update_status;
 	update_status.push_back(0);
 	update_status.push_back(0);
-	update_status.push_back(0);
 	return update_status;
 }

--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@ -94,14 +94,12 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
          for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
              for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
                  Phrase ngram(Output);
-                  //cerr << "start: " << end_idx-order << " end: " << end_idx << endl;
                  for (size_t s_idx = end_idx - order; s_idx < end_idx; s_idx++) {
                      const Factor* f = fc.AddFactor(Output, 0, refTokens[s_idx]);
                      Word w;
                      w.SetFactor(0, f);
                      ngram.AddWord(w);
                  }
-                  //cerr << "Ref: " << ngram << endl;
                  ref_pair.second[ngram] += 1;
              }
          }
@ -120,10 +118,10 @@ void BleuScoreFeature::SetCurrentReference(size_t ref_id) {
 }

 /*
- * Update the pseudo-document big_O after each translation of a source sentence.
- * (big_O is an exponentially-weighted moving average of vectors c(e;{r_k}))
- * big_O = 0.9 * (big_O + c(e_oracle))
- * big_O_f = 0.9 * (big_O_f + |f|)		input length of document big_O
+ * Update the pseudo-document O after each translation of a source sentence.
+ * (O is an exponentially-weighted moving average of vectors c(e;{r_k}))
+ * O = m_historySmoothing * (O + c(e_oracle))
+ * O_f = m_historySmoothing * (O_f + |f|)		input length of pseudo-document
 */
 void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
    Phrase phrase(Output, hypo);
@ -138,7 +136,6 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
    for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
        m_count_history[i] = m_historySmoothing * (m_count_history[i] + ngram_counts[i]);
        m_match_history[i] = m_historySmoothing * (m_match_history[i] + ngram_matches[i]);
-        //cerr << "precisionHistory " << i + 1 << ": " << (m_match_history[i]/m_count_history[i]) << " (" << m_match_history[i] << "/" << m_count_history[i] << ")" << endl;
    }

    // update counts for reference and target length
@ -148,7 +145,7 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
 }

 /*
- * Update history with a batch of oracle translations
+ * Update history with a batch of translations
 */
 void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypos, vector<size_t>& sourceLengths, vector<size_t>& ref_ids, size_t rank, size_t epoch) {
 	for (size_t batchPosition = 0; batchPosition < hypos.size(); ++batchPosition){
@ -195,7 +192,7 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
 }

 /*
- * Update history with a batch of oracle translations
+ * Print batch of reference translations
 */
 void BleuScoreFeature::PrintReferenceLength(const vector<size_t>& ref_ids) {
 	for (size_t batchPosition = 0; batchPosition < ref_ids.size(); ++batchPosition){
@ -325,7 +322,6 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
    }

    new_state->m_source_length = cur_hypo.GetWordsBitmap().GetSize();
-    new_state->m_source_phrase_length = cur_hypo.GetCurrSourceWordsRange().GetNumWordsCovered(); // todo: delete
    new_state->m_words = new_words.GetSubString(WordsRange(ctx_start_idx,
                                                           ctx_end_idx));
    new_state->m_target_length += cur_hypo.GetTargetPhrase().GetSize();
@ -337,7 +333,6 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,

    // Calculate new bleu.
    new_bleu = CalculateBleu(new_state);
-    //cerr << "NS: " << *new_state << " NB " << new_bleu << endl;

    // Set score to new Bleu score
    accumulator->PlusEquals(this, new_bleu - old_bleu);
@ -396,82 +391,6 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
    return precision;
 }

-vector<float> BleuScoreFeature::CalculateBleuOfCorpus(const vector< vector< const Word* > >& oracles, const vector<size_t>& ref_ids) {
-	// get ngram matches and counts for all oracle sentences and their references
-	vector<size_t> sumOfClippedNgramMatches(BleuScoreState::bleu_order);
-	vector<size_t> sumOfNgramCounts(BleuScoreState::bleu_order);
-	size_t ref_length = 0;
-	size_t target_length = 0;
-
-	for (size_t batchPosition = 0; batchPosition < oracles.size(); ++batchPosition){
-		Phrase phrase(Output, oracles[batchPosition]);
-		size_t ref_id = ref_ids[batchPosition];
-		size_t cur_ref_length = m_refs[ref_id].first;
-		NGrams cur_ref_ngrams = m_refs[ref_id].second;
-
-		ref_length += cur_ref_length;
-		target_length += oracles[batchPosition].size();
-
-		std::vector< size_t > ngram_counts(BleuScoreState::bleu_order);
-		std::vector< size_t > clipped_ngram_matches(BleuScoreState::bleu_order);
-		GetClippedNgramMatchesAndCounts(phrase, cur_ref_ngrams, ngram_counts, clipped_ngram_matches, 0);
-
-		// add clipped ngram matches and ngram counts to corpus sums
-		for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
-			sumOfClippedNgramMatches[i] += clipped_ngram_matches[i];
-			sumOfNgramCounts[i] += ngram_counts[i];
-		}
-	}
-
-	if (!sumOfNgramCounts[0]) {
-		vector<float> empty(0);
-		return empty;
-	}
-	if (!sumOfClippedNgramMatches[0]) {
-		vector<float> empty(0);
-		return empty;			// if we have no unigram matches, score should be 0
-	}
-
-	// calculate bleu score
-	float precision = 1.0;
-
-	vector<float> bleu;
-	// Calculate geometric mean of modified ngram precisions
-	// BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
-	// 		  = BP * 4th root(PRODUCT_1_4 p_n)
-	for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
-		if (sumOfNgramCounts[i]) {
-			precision *= 1.0*sumOfClippedNgramMatches[i] / sumOfNgramCounts[i];
-			bleu.push_back(1.0*sumOfClippedNgramMatches[i] / sumOfNgramCounts[i]);
-		}
-	}
-
-	// take geometric mean
-	precision = pow(precision, (float)1/4);
-
-	// Apply brevity penalty if applicable.
-	// BP = 1 				if c > r
-	// BP = e^(1- r/c))		if c <= r
-	// where
-	// c: length of the candidate translation
-	// r: effective reference length (sum of best match lengths for each candidate sentence)
-	float BP;
-	if (target_length < ref_length) {
-		precision *= exp(1 - (1.0*ref_length/target_length));
-		BP = exp(1 - (1.0*ref_length/target_length));
-	}
-	else {
-		BP = 1.0;
-  }
-
-	bleu.push_back(precision);
-	bleu.push_back(BP);
-	bleu.push_back(1.0*target_length/ref_length);
-	bleu.push_back(target_length);
-	bleu.push_back(ref_length);
-	return bleu;
-}
-
 const FFState* BleuScoreFeature::EmptyHypothesisState(const InputType& input) const
 {
    return new BleuScoreState();
--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@ -29,8 +29,6 @@ private:
    size_t m_source_length;
    size_t m_target_length;

-    size_t m_source_phrase_length; // todo: delete
-
    // scaled reference length is needed for scoring incomplete hypotheses against reference translation
    float m_scaled_ref_length;

@ -52,7 +50,7 @@ public:
 	                                 m_target_length_history(0),
 	                                 m_ref_length_history(0),
 	                                 m_scale_by_input_length(true),
-	                                 m_historySmoothing(0.9) {}
+	                                 m_historySmoothing(0.7) {}

 	BleuScoreFeature(bool scaleByInputLength, float historySmoothing):
 	                                 StatefulFeatureFunction("BleuScore"),
@ -101,11 +99,10 @@ public:
                       const FFState* prev_state, 
                       ScoreComponentCollection* accumulator) const;
    float CalculateBleu(BleuScoreState*) const;
-    std::vector<float> CalculateBleuOfCorpus(const std::vector< std::vector< const Word* > >& hypos, const std::vector<size_t>& ref_ids);
    const FFState* EmptyHypothesisState(const InputType&) const;

 private:
-    // counts for pseudo-document big_O
+    // counts for pseudo-document
    std::vector< float > m_count_history;
    std::vector< float > m_match_history;
    float m_source_length_history;
@ -117,9 +114,10 @@ private:
    NGrams m_cur_ref_ngrams;
    size_t m_cur_ref_length;

-    // whether or not to scale the BLEU score by a history of the input size
+    // scale BLEU score by history of input size
    bool m_scale_by_input_length;

+    // smoothing factor for history counts
    float m_historySmoothing;
 };