From 42333388b4f177c2626ca4e6dc498335b89aa280 Mon Sep 17 00:00:00 2001
From: ehasler <ehasler@cc96ff50-19ce-11e0-b349-13d7f0bd23df>
Date: Sun, 26 Jun 2011 19:12:46 +0000
Subject: [PATCH] change verbosity for cerr messages, remove some unwanted
 options, introduce --margin-slack, --margin-incr

git-svn-id: http://svn.statmt.org/repository/mira@3913 cc96ff50-19ce-11e0-b349-13d7f0bd23df
---
 mira/Decoder.cpp       |  22 ++-
 mira/Decoder.h         |  15 +-
 mira/Main.cpp          | 409 +++++++++++++----------------------------
 mira/MiraOptimiser.cpp | 221 +++++++++-------------
 mira/Optimiser.h       |  44 ++---
 mira/Perceptron.cpp    |   4 +-
 6 files changed, 257 insertions(+), 458 deletions(-)

diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp
index 1181ee8be..4a7f116f1 100644
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@@ -68,7 +68,7 @@ namespace Mira {
     delete[] mosesargv;
   }
  
-  MosesDecoder::MosesDecoder(bool useScaledReference, bool scaleByInputLength, float historySmoothing)
+  MosesDecoder::MosesDecoder(bool scaleByInputLength, float historySmoothing)
 		: m_manager(NULL) {
 			// force initialisation of the phrase dictionary (TODO: what for?)
 			const StaticData &staticData = StaticData::Instance();
@@ -82,7 +82,7 @@ namespace Mira {
       m_manager->ProcessSentence();
 
       // Add the bleu feature
-      m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, historySmoothing);
+      m_bleuScoreFeature = new BleuScoreFeature(scaleByInputLength, historySmoothing);
       (const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
   }
   
@@ -100,7 +100,8 @@ namespace Mira {
                               vector< float>& bleuScores,
                               bool oracle,
                               bool distinct,
-                              size_t rank)
+                              size_t rank,
+                              size_t epoch)
   {
   	StaticData &staticData = StaticData::InstanceNonConst();
 
@@ -137,11 +138,11 @@ namespace Mira {
 
     	//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
     	float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
-    	cerr << "Rank " << rank << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
+    	cerr << "Rank " << rank << ", epoch " << epoch << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
 
     	Phrase bestPhrase = path.GetTargetPhrase();
 
-    	cerr << "Rank " << rank << ": ";
+    	cerr << "Rank " << rank << ", epoch " << epoch << ": ";
     	Phrase phrase = path.GetTargetPhrase();
     	for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
     		const Word &word = phrase.GetWord(pos);
@@ -179,7 +180,9 @@ namespace Mira {
                                 size_t sentenceid,
                                 float bleuObjectiveWeight,
                                 float bleuScoreWeight,
-                                bool distinct)
+                                bool distinct,
+                                size_t rank,
+                                size_t epoch)
   {
   	StaticData &staticData = StaticData::InstanceNonConst();
 
@@ -215,15 +218,15 @@ namespace Mira {
   	bleuAndScore.push_back(bleuScore);
   	bleuAndScore.push_back(scoreWithoutBleu);
 
-  	cerr << "1best translation: ";
+  	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", 1best translation: ");
   	Phrase phrase = path.GetTargetPhrase();
   	for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
   		const Word &word = phrase.GetWord(pos);
   		Word *newWord = new Word(word);
-  		cerr << *newWord;
+  		VERBOSE(1, *newWord);
   	}
 
-  	cerr << endl;
+  	VERBOSE(1, endl);
 
   	return bleuAndScore;
   }
@@ -245,7 +248,6 @@ namespace Mira {
   }
 
   void MosesDecoder::setWeights(const ScoreComponentCollection& weights) {
-    //cerr << "New weights: " << weights << endl;
     StaticData::InstanceNonConst().SetAllWeights(weights);
   }
 
diff --git a/mira/Decoder.h b/mira/Decoder.h
index 9ae14b7f3..fdb6bb357 100644
--- a/mira/Decoder.h
+++ b/mira/Decoder.h
@@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel,  int argc, std::vecto
  **/
 class MosesDecoder {
   public:
-    MosesDecoder(bool useScaledReference, bool scaleByInputLength, float historySmoothing);
+    MosesDecoder(bool scaleByInputLength, float historySmoothing);
 	
     //returns the best sentence
     std::vector<const Moses::Word*> getNBest(const std::string& source,
@@ -62,12 +62,15 @@ class MosesDecoder {
                           std::vector< float>& scores,
                           bool oracle,
                           bool distinct,
-                          size_t rank);
+                          size_t rank,
+                          size_t epoch);
     std::vector<float> getBleuAndScore(const std::string& source,
 													size_t sentenceid,
 													float bleuObjectiveWeight,
 													float bleuScoreWeight,
-													bool distinct);
+													bool distinct,
+													size_t rank,
+													size_t epoch);
     size_t getCurrentInputLength();
     void updateHistory(const std::vector<const Moses::Word*>& words);
     void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
@@ -77,13 +80,13 @@ class MosesDecoder {
     std::vector<float> calculateBleuOfCorpus(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& ref_ids, size_t epoch, size_t rank);
     Moses::ScoreComponentCollection getWeights();
     void setWeights(const Moses::ScoreComponentCollection& weights);
-		void cleanup();
+	void cleanup();
 		
 	private:
     float getBleuScore(const Moses::ScoreComponentCollection& scores);
     void setBleuScore(Moses::ScoreComponentCollection& scores, float bleu);
-		Moses::Manager *m_manager;
-		Moses::Sentence *m_sentence;
+    Moses::Manager *m_manager;
+    Moses::Sentence *m_sentence;
     Moses::BleuScoreFeature *m_bleuScoreFeature;
 	
 
diff --git a/mira/Main.cpp b/mira/Main.cpp
index f3f1bb2c5..2f2676b70 100644
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@@ -144,7 +144,6 @@ int main(int argc, char** argv) {
 	size_t weightDumpFrequency;
 	string weightDumpStem;
 	float min_learning_rate;
-	float min_sentence_update;
 	size_t scale_margin;
 	bool scale_update;
 	size_t n;
@@ -153,14 +152,12 @@ int main(int argc, char** argv) {
 	bool onlyViolatedConstraints;
 	bool accumulateWeights;
 	float historySmoothing;
-	bool useScaledReference;
 	bool scaleByInputLength;
 	float slack;
 	float slack_step;
 	float slack_min;
 	bool averageWeights;
 	bool weightConvergence;
-	bool controlUpdates;
 	float learning_rate;
 	float mira_learning_rate;
 	float perceptron_learning_rate;
@@ -168,24 +165,18 @@ int main(int argc, char** argv) {
 	size_t baseOfLog;
 	string decoder_settings;
 	float min_weight_change;
-	float max_sentence_update;
 	float decrease_learning_rate;
-	float decrease_sentence_update;
 	bool devBleu;
 	bool normaliseWeights;
 	bool print_feature_values;
-	bool stop_dev_bleu;
-	bool stop_approx_dev_bleu;
-	bool train_linear_classifier;
-	bool multiplyA;
 	bool historyOf1best;
 	bool burnIn;
 	string burnInInputFile;
 	vector<string> burnInReferenceFiles;
 	bool sentenceLevelBleu;
 	float bleuScoreWeight;
-	float precision;
-	float min_bleu_change;
+	float margin_slack;
+	float margin_slack_incr;
 	bool analytical_update;
 	bool perceptron_update;
 	bool hope_fear;
@@ -204,49 +195,42 @@ int main(int argc, char** argv) {
 		("burn-in-input-file", po::value<string>(&burnInInputFile), "Input file for burn-in phase of BLEU history")
 		("burn-in-reference-files", po::value<vector<string> >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history")
 		("config,f", po::value<string>(&mosesConfigFile), "Moses ini file")
-		("control-updates", po::value<bool>(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error")
 		("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
 		("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
 		("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
-		("decr-sentence-update", po::value<float>(&decrease_sentence_update)->default_value(0), "Decrease maximum weight update by the given value after every epoch")
 		("dev-bleu", po::value<bool>(&devBleu)->default_value(true), "Compute BLEU score of oracle translations of the whole tuning set")
 		("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use nbest list with distinct translations in inference step")
 		("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
-		("epochs,e", po::value<size_t>(&epochs)->default_value(5), "Number of epochs")
+		("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
 		("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
 		("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
-		("history-of-1best", po::value<bool>(&historyOf1best)->default_value(0), "Use the 1best translation to update the history")
-		("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
+		("history-of-1best", po::value<bool>(&historyOf1best)->default_value(false), "Use the 1best translation to update the history")
+		("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.7), "Adjust the factor for history smoothing")
 		("hope-fear", po::value<bool>(&hope_fear)->default_value(true), "Use only hope and fear translations for optimization (not model)")
 		("hope-n", po::value<int>(&hope_n)->default_value(-1), "Number of hope translations used")
 		("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
 		("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
+		("margin-slack", po::value<float>(&margin_slack)->default_value(0), "Slack when comparing left and right hand side of constraints")
+		("margin-incr", po::value<float>(&margin_slack_incr)->default_value(0), "Increment margin slack after every epoch by this amount")
 		("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
 		("log-feature-values", po::value<bool>(&logFeatureValues)->default_value(false), "Take log of feature values according to the given base.")
-		("min-bleu-change", po::value<float>(&min_bleu_change)->default_value(0), "Minimum BLEU change of 1best translations of one epoch")
-		("min-sentence-update", po::value<float>(&min_sentence_update)->default_value(0), "Set a minimum weight update per sentence")
 		("min-learning-rate", po::value<float>(&min_learning_rate)->default_value(0), "Set a minimum learning rate")
-		("max-sentence-update", po::value<float>(&max_sentence_update)->default_value(-1), "Set a maximum weight update per sentence")
 		("min-weight-change", po::value<float>(&min_weight_change)->default_value(0.01), "Set minimum weight change for stopping criterion")
-		("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(1), "How often per epoch to mix weights, when using mpi")
+		("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(5), "How often per epoch to mix weights, when using mpi")
 		("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimization")
-		("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
+		("nbest,n", po::value<size_t>(&n)->default_value(1), "Number of translations in nbest list")
 		("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
 		("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
 		("perceptron-learning-rate", po::value<float>(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate")
-		("precision", po::value<float>(&precision)->default_value(0), "Precision when comparing left and right hand side of constraints")
 		("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
 		("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
 		("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
-		("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(false), "Use a sentences level bleu scoring function")
+		("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(true), "Use a sentences level bleu scoring function")
 		("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
 	    ("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimizer")
 	    ("slack-min", po::value<float>(&slack_min)->default_value(0.01), "Minimum slack used")
 	    ("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
-	    ("stop-dev-bleu", po::value<bool>(&stop_dev_bleu)->default_value(false), "Stop when average Bleu (dev) decreases (or no more increases)")
-	    ("stop-approx-dev-bleu", po::value<bool>(&stop_approx_dev_bleu)->default_value(false), "Stop when average approx. sentence Bleu (dev) decreases (or no more increases)")
 	    ("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
-	    ("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
 	    ("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
 	    ("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
 	    ("scale-update", po::value<bool>(&scale_update)->default_value(false), "Scale the update by the Bleu score of the oracle translation")
@@ -255,8 +239,7 @@ int main(int argc, char** argv) {
 	po::options_description cmdline_options;
 	cmdline_options.add(desc);
 	po::variables_map vm;
-	po::store(
-	    po::command_line_parser(argc, argv). options(cmdline_options).run(), vm);
+	po::store(po::command_line_parser(argc, argv). options(cmdline_options).run(), vm);
 	po::notify(vm);
 
 	if (help) {
@@ -329,7 +312,7 @@ int main(int argc, char** argv) {
 	vector<string> decoder_params;
 	boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
 	initMoses(mosesConfigFile, verbosity, decoder_params.size(), decoder_params);
-	MosesDecoder* decoder = new MosesDecoder(useScaledReference, scaleByInputLength, historySmoothing);
+	MosesDecoder* decoder = new MosesDecoder(scaleByInputLength, historySmoothing);
 	if (normaliseWeights) {
 		ScoreComponentCollection startWeights = decoder->getWeights();
 		startWeights.L1Normalise();
@@ -353,12 +336,16 @@ int main(int argc, char** argv) {
 	// initialise optimizer
 	Optimiser* optimiser = NULL;
 	if (learner == "mira") {
-		cerr << "Optimising using Mira" << endl;
-		optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, precision);
+		if (rank == 0) {
+			cerr << "Optimising using Mira" << endl;
+		}
+		optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack);
 		learning_rate = mira_learning_rate;
 		perceptron_update = false;
 	} else if (learner == "perceptron") {
-		cerr << "Optimising using Perceptron" << endl;
+		if (rank == 0) {
+			cerr << "Optimising using Perceptron" << endl;
+		}
 		optimiser = new Perceptron();
 		learning_rate = perceptron_learning_rate;
 		perceptron_update = true;
@@ -373,7 +360,7 @@ int main(int argc, char** argv) {
 	// resolve parameter dependencies
 	if (perceptron_update || analytical_update) {
 		batchSize = 1;
-		cerr << "Setting batch size to 1 for perceptron/analytical update" << endl;
+		cerr << "Info: Setting batch size to 1 for perceptron/analytical update" << endl;
 	}
 
 	if (hope_n == -1 && fear_n == -1) {
@@ -385,14 +372,18 @@ int main(int argc, char** argv) {
 		hope_fear = false; // is true by default
 	}
 
+	if (!hope_fear && !analytical_update) {
+		model_hope_fear = true;
+	}
+
 	if (model_hope_fear && analytical_update) {
-		cerr << "Error: must choose between model-hope-fear and analytical update" << endl;
+		cerr << "Error: Must choose between model-hope-fear and analytical update" << endl;
 		return 1;
 	}
 
 	if (burnIn && sentenceLevelBleu) {
 		burnIn = false;
-		cerr << "Burn-in not needed when using sentence-level BLEU, deactivating burn-in." << endl;
+		cerr << "Info: Burn-in not needed when using sentence-level BLEU, deactivating burn-in." << endl;
 	}
 
 	if (burnIn) {
@@ -436,7 +427,7 @@ int main(int argc, char** argv) {
 			order.push_back(i);
 		}
 
-		cerr << "Rank " << rank << ", starting burn-in phase for approx. BLEU history.." << endl;
+		VERBOSE(1, "Rank " << rank << ", starting burn-in phase for approx. BLEU history.." << endl);
 		if (historyOf1best) {
 			// get 1best translations for the burn-in sentences
 			vector<size_t>::const_iterator sid = order.begin();
@@ -444,7 +435,7 @@ int main(int argc, char** argv) {
 				string& input = burnInInputSentences[*sid];
 				vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
 						featureValues[0], bleuScores[0], true,
-						distinctNbest, rank);
+						distinctNbest, rank, -1);
 				inputLengths.push_back(decoder->getCurrentInputLength());
 				ref_ids.push_back(*sid);
 				decoder->cleanup();
@@ -468,8 +459,7 @@ int main(int argc, char** argv) {
 			while (sid != order.end()) {
 				string& input = burnInInputSentences[*sid];
 				vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight,
-						featureValues[0], bleuScores[0], true,
-						distinctNbest, rank);
+						featureValues[0], bleuScores[0], true, distinctNbest, rank, -1);
 				inputLengths.push_back(decoder->getCurrentInputLength());
 				ref_ids.push_back(*sid);
 				decoder->cleanup();
@@ -488,7 +478,7 @@ int main(int argc, char** argv) {
 			}
 		}
 
-		cerr << "Bleu feature history after burn-in: " << endl;
+		VERBOSE(1, "Bleu feature history after burn-in: " << endl);
 		decoder->printBleuFeatureHistory(cerr);
 		decoder->loadReferenceSentences(referenceSentences);
 	}
@@ -532,44 +522,28 @@ int main(int argc, char** argv) {
 	size_t numberOfUpdates = 0;
 	size_t numberOfUpdatesThisEpoch = 0;
 
-	time_t now = time(0); // get current time
-	struct tm* tm = localtime(&now); // get struct filled out
-	cerr << "Start date/time: " << tm->tm_mon + 1 << "/" << tm->tm_mday << "/"
-	    << tm->tm_year + 1900 << ", " << tm->tm_hour << ":" << tm->tm_min << ":"
-	    << tm->tm_sec << endl;
+	time_t now;
+	time(&now);
+	cerr << "Rank " << rank << ", " << ctime(&now) << endl;
 
 	ScoreComponentCollection mixedAverageWeights;
 	ScoreComponentCollection mixedAverageWeightsPrevious;
 	ScoreComponentCollection mixedAverageWeightsBeforePrevious;
 
-/*	float averageRatio = 0;
-	float averageBleu = 0;
-	float prevAverageBleu = 0;
-	float beforePrevAverageBleu = 0;
-	float summedApproxBleu = 0;
-	float averageApproxBleu = 0;
-	float prevAverageApproxBleu = 0;
-	float beforePrevAverageApproxBleu = 0;*/
 	bool stop = false;
 	int sumStillViolatedConstraints;
 	int sumStillViolatedConstraints_lastEpoch = 0;
 	int sumConstraintChangeAbs;
 	int sumConstraintChangeAbs_lastEpoch = 0;
-	size_t sumBleuChangeAbs;
+//	size_t sumBleuChangeAbs;
 	float *sendbuf, *recvbuf;
 	sendbuf = (float *) malloc(sizeof(float));
 	recvbuf = (float *) malloc(sizeof(float));
-	// Note: make sure that the variable mosesWeights always holds the current decoder weights
 	for (size_t epoch = 0; epoch < epochs && !stop; ++epoch) {
-		cerr << "\nRank " << rank << ", epoch " << epoch << endl;
-
 		// sum of violated constraints
 		sumStillViolatedConstraints = 0;
 		sumConstraintChangeAbs = 0;
-		sumBleuChangeAbs = 0;
-
-		// sum of approx. sentence bleu scores per epoch
-//		summedApproxBleu = 0;
+//		sumBleuChangeAbs = 0;
 
 		numberOfUpdatesThisEpoch = 0;
 		// Sum up weights over one epoch, final average uses weights from last epoch
@@ -601,8 +575,7 @@ int main(int argc, char** argv) {
 
 			// get moses weights
 			ScoreComponentCollection mosesWeights = decoder->getWeights();
-			cerr << "\nRank " << rank << ", next batch" << endl;
-			cerr << "Rank " << rank << ", weights: " << mosesWeights << endl;
+			VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", weights: " << mosesWeights << endl);
 
 			// BATCHING: produce nbest lists for all input sentences in batch
 			vector<float> oracleBleuScores;
@@ -618,8 +591,7 @@ int main(int argc, char** argv) {
 			    != shard.end(); ++batchPosition) {
 				string& input = inputSentences[*sid];
 				const vector<string>& refs = referenceSentences[*sid];
-				cerr << "Rank " << rank << ", batch position " << batchPosition << endl;
-				cerr << "Rank " << rank << ", input sentence " << *sid << ": \"" << input << "\"" << endl;
+				cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl;
 
 				vector<ScoreComponentCollection> newFeatureValues;
 				vector<float> newBleuScores;
@@ -640,13 +612,13 @@ int main(int argc, char** argv) {
 				if (perceptron_update || analytical_update) {
 					if (historyOf1best) {
 						// MODEL (for updating the history)
-						cerr << "Rank " << rank << ", run decoder to get " << 1 << "best wrt model score" << endl;
+						cerr << "Rank " << rank << ", run decoder to get 1best wrt model score (for history)" << endl;
 						vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
 								dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
-								distinctNbest, rank);
+								distinctNbest, rank, epoch);
 						decoder->cleanup();
 						oneBests.push_back(bestModel);
-						cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl;
+						VERBOSE(1, "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl);
 					}
 
 					// clear dummies
@@ -658,22 +630,22 @@ int main(int argc, char** argv) {
 					size_t oraclePos = 0;
 					vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight,
 							featureValuesHope[batchPosition], bleuScoresHope[batchPosition], true,
-							distinctNbest, rank);
+							distinctNbest, rank, epoch);
 					// needed for history
 					inputLengths.push_back(decoder->getCurrentInputLength());
 					ref_ids.push_back(*sid);
 					decoder->cleanup();
 					oracles.push_back(oracle);
-					cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][oraclePos] << endl;
+					VERBOSE(1, "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][oraclePos] << endl);
 
 					// FEAR
 					cerr << "Rank " << rank << ", run decoder to get 1best fear translations" << endl;
 					size_t fearPos = 0;
 					vector<const Word*> fear = decoder->getNBest(input, *sid, 1, -1.0, bleuScoreWeight,
 							featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true,
-							distinctNbest, rank);
+							distinctNbest, rank, epoch);
 					decoder->cleanup();
-					cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][fearPos] << endl;
+					VERBOSE(1, "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][fearPos] << endl);
 					for (size_t i = 0; i < fear.size(); ++i) {
 						delete fear[i];
 					}
@@ -682,37 +654,34 @@ int main(int argc, char** argv) {
 					if (hope_fear) {
 						if (historyOf1best) {
 							// MODEL (for updating the history only, using dummy vectors)
-							cerr << "Rank " << rank << ", run decoder to get " << 1 << "best wrt model score" << endl;
-							cerr << "dummyFeatureValues.size: " << dummyFeatureValues.size() << endl;
-							cerr << "batch position: " << batchPosition << endl;
+							cerr << "Rank " << rank << ", run decoder to get 1best wrt model score (for history)" << endl;
 							vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
 									dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
-									distinctNbest, rank);
-							cerr << "finished decoding." << endl;
+									distinctNbest, rank, epoch);
 							decoder->cleanup();
 							oneBests.push_back(bestModel);
-							cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl;
+							VERBOSE(1, "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl);
 						}
 
 						// HOPE
 						cerr << "Rank " << rank << ", run decoder to get " << hope_n << "best hope translations" << endl;
 						vector<const Word*> oracle = decoder->getNBest(input, *sid, hope_n, 1.0, bleuScoreWeight,
 										featureValuesHope[batchPosition], bleuScoresHope[batchPosition], true,
-										distinctNbest, rank);
+										distinctNbest, rank, epoch);
 						// needed for history
 						inputLengths.push_back(decoder->getCurrentInputLength());
 						ref_ids.push_back(*sid);
 						decoder->cleanup();
 						oracles.push_back(oracle);
-						cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][0] << endl;
+						VERBOSE(1, "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][0] << endl);
 
 						// FEAR
 						cerr << "Rank " << rank << ", run decoder to get " << fear_n << "best fear translations" << endl;
 						vector<const Word*> fear = decoder->getNBest(input, *sid, fear_n, -1.0, bleuScoreWeight,
 										featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true,
-										distinctNbest, rank);
+										distinctNbest, rank, epoch);
 						decoder->cleanup();
-						cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][0] << endl;
+						VERBOSE(1, "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][0] << endl);
 						for (size_t i = 0; i < fear.size(); ++i) {
 							delete fear[i];
 						}
@@ -722,26 +691,26 @@ int main(int argc, char** argv) {
 						cerr << "Rank " << rank << ", run decoder to get " << n << "best wrt model score" << endl;
 						vector<const Word*> bestModel = decoder->getNBest(input, *sid, n, 0.0, bleuScoreWeight,
 									featureValues[batchPosition], bleuScores[batchPosition], true,
-									distinctNbest, rank);
+									distinctNbest, rank, epoch);
 						decoder->cleanup();
 						oneBests.push_back(bestModel);
 						// needed for calculating bleu of dev (1best translations) // todo:
 						all_ref_ids.push_back(*sid);
 						allBestModelScore.push_back(bestModel);
-						cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;
+						VERBOSE(1, "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl);
 
 						// HOPE
 						cerr << "Rank " << rank << ", run decoder to get " << n << "best hope translations" << endl;
 						size_t oraclePos = featureValues[batchPosition].size();
 						vector<const Word*> oracle = decoder->getNBest(input, *sid, n, 1.0, bleuScoreWeight,
 										featureValues[batchPosition], bleuScores[batchPosition], true,
-										distinctNbest, rank);
+										distinctNbest, rank, epoch);
 						// needed for history
 						inputLengths.push_back(decoder->getCurrentInputLength());
 						ref_ids.push_back(*sid);
 						decoder->cleanup();
 						oracles.push_back(oracle);
-						cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl;
+						VERBOSE(1, "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl);
 
 						oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
 						oracleBleuScores.push_back(bleuScores[batchPosition][oraclePos]);
@@ -751,18 +720,15 @@ int main(int argc, char** argv) {
 						size_t fearPos = featureValues[batchPosition].size();
 						vector<const Word*> fear = decoder->getNBest(input, *sid, n, -1.0, bleuScoreWeight,
 										featureValues[batchPosition], bleuScores[batchPosition], true,
-										distinctNbest, rank);
+										distinctNbest, rank, epoch);
 						decoder->cleanup();
-						cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScores[batchPosition][fearPos] << endl;
+						VERBOSE(1, "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScores[batchPosition][fearPos] << endl);
 						for (size_t i = 0; i < fear.size(); ++i) {
 							delete fear[i];
 						}
 					}
 				}
 
-//				cerr << "Rank " << rank << ", sentence " << *sid << ", best model Bleu (approximate sentence bleu): "  << bleuScores[batchPosition][0] << endl;
-//				summedApproxBleu += bleuScores[batchPosition][0];
-
 				// next input sentence
 				++sid;
 				++actualBatchSize;
@@ -802,14 +768,14 @@ int main(int argc, char** argv) {
 				}
 			}
 
-			// get 1best model results with old weights
+/*			// get 1best model results with old weights
 			vector< vector <float > > bestModelOld_batch;
 			for (size_t i = 0; i < actualBatchSize; ++i) {
 				string& input = inputSentences[*current_sid_start + i];
-				vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
+				vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch);
 				bestModelOld_batch.push_back(bestModelOld);
 				decoder->cleanup();
-			}
+			}*/
 
 			// optionally print out the feature values
 			if (print_feature_values) {
@@ -840,7 +806,7 @@ int main(int argc, char** argv) {
 			}
 
 			// Run optimiser on batch:
-			cerr << "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl;
+			VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
 			ScoreComponentCollection oldWeights(mosesWeights);
 			vector<int> update_status;
 			if (perceptron_update) {
@@ -848,12 +814,12 @@ int main(int argc, char** argv) {
 				vector<size_t> dummy2;
 				update_status = optimiser->updateWeightsHopeFear(mosesWeights,
 						featureValuesHope, featureValuesFear,	dummy1, dummy1, dummy2,
-						learning_rate, 0, rank, epoch, 0);
+						learning_rate, rank, epoch);
 			}
 			else if (analytical_update) {
 					update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(mosesWeights,
 							featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0], bleuScoresFear[0][0],
-							ref_ids[0], learning_rate, max_sentence_update, rank, epoch, controlUpdates);
+							ref_ids[0], learning_rate, rank, epoch);
 			}
 			else {
 				if (hope_fear) {
@@ -884,74 +850,64 @@ int main(int argc, char** argv) {
 
 					update_status = optimiser->updateWeightsHopeFear(mosesWeights,
 							featureValuesHope, featureValuesFear,	bleuScoresHope, bleuScoresFear, ref_ids,
-							learning_rate, max_sentence_update, rank, epoch, controlUpdates);
+							learning_rate, rank, epoch);
 				}
 				else {
 					// model_hope_fear
 					update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, featureValues,
 							losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids,
-							learning_rate, max_sentence_update, rank, epoch, controlUpdates);
+							learning_rate, rank, epoch);
 				}
 			}
 
-			if (update_status[0] == 1) {
-				cerr << "Rank " << rank << ", epoch " << epoch << ", no update for batch" << endl;
+			sumConstraintChangeAbs += abs(update_status[0] - update_status[1]);
+			sumStillViolatedConstraints += update_status[1];
+
+			// pass new weights to decoder
+			if (normaliseWeights) {
+				mosesWeights.L1Normalise();
 			}
-			else if (update_status[0] == -1) {
-				cerr << "Rank " << rank << ", epoch " << epoch << ", update ignored" << endl;
+
+			cumulativeWeights.PlusEquals(mosesWeights);
+			++numberOfUpdates;
+			++numberOfUpdatesThisEpoch;
+			if (averageWeights) {
+				ScoreComponentCollection averageWeights(cumulativeWeights);
+				if (accumulateWeights) {
+					averageWeights.DivideEquals(numberOfUpdates);
+				} else {
+					averageWeights.DivideEquals(numberOfUpdatesThisEpoch);
+				}
+
+				mosesWeights = averageWeights;
 			}
-			else {
-				sumConstraintChangeAbs += abs(update_status[1] - update_status[2]);
-				sumStillViolatedConstraints += update_status[2];
 
-				// pass new weights to decoder
-				if (normaliseWeights) {
-					mosesWeights.L1Normalise();
-				}
+			// set new Moses weights (averaged or not)
+			decoder->setWeights(mosesWeights);
 
-				cumulativeWeights.PlusEquals(mosesWeights);
-				++numberOfUpdates;
-				++numberOfUpdatesThisEpoch;
-				if (averageWeights) {
-					ScoreComponentCollection averageWeights(cumulativeWeights);
-					if (accumulateWeights) {
-						averageWeights.DivideEquals(numberOfUpdates);
-					} else {
-						averageWeights.DivideEquals(numberOfUpdatesThisEpoch);
-					}
+			// compute difference to old weights
+			ScoreComponentCollection weightDifference(mosesWeights);
+			weightDifference.MinusEquals(oldWeights);
+			VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weight difference: " << weightDifference << endl);
 
-					mosesWeights = averageWeights;
-					cerr << "Rank " << rank << ", epoch " << epoch << ", set new average weights: " << mosesWeights << endl;
-				}
-				else {
-					cerr << "Rank " << rank << ", epoch " << epoch << ", set new weights: " << mosesWeights << endl;
-				}
-
-				// set new Moses weights (averaged or not)
-				decoder->setWeights(mosesWeights);
-
-				// compute difference to old weights
-				ScoreComponentCollection weightDifference(mosesWeights);
-				weightDifference.MinusEquals(oldWeights);
-				cerr << "Rank " << rank << ", epoch " << epoch << ", weight difference: " << weightDifference << endl;
-
-				// get 1best model results with new weights (for each sentence in batch)
-				vector<float> bestModelNew;
-				for (size_t i = 0; i < actualBatchSize; ++i) {
-					string& input = inputSentences[*current_sid_start + i];
-					bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
-					decoder->cleanup();
-					sumBleuChangeAbs += abs(bestModelOld_batch[i][0] - bestModelNew[0]);
-					cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl;
-					cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl;
-				}
-			}
+/*			// get 1best model results with new weights (for each sentence in batch)
+			vector<float> bestModelNew;
+			for (size_t i = 0; i < actualBatchSize; ++i) {
+				string& input = inputSentences[*current_sid_start + i];
+				bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest, rank, epoch);
+				decoder->cleanup();
+				sumBleuChangeAbs += abs(bestModelOld_batch[i][0] - bestModelNew[0]);
+				VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl);
+				VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl);
+			}*/
 
 			// update history (for approximate document Bleu)
 			if (sentenceLevelBleu) {
 				for (size_t i = 0; i < oracles.size(); ++i) {
-					cerr << "Rank " << rank << ", epoch " << epoch << ", oracle length: " << oracles[i].size() << " ";
-					decoder->printReferenceLength(ref_ids);
+					VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", oracle length: " << oracles[i].size() << " ");
+					if (verbosity > 0) {
+						decoder->printReferenceLength(ref_ids);
+					}
 				}
 			}
 			else {
@@ -1058,16 +1014,17 @@ int main(int argc, char** argv) {
 						cerr << "\nMixed average weights during epoch " << epoch << ": " << mixedAverageWeights << endl;
 					}
 
-					cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl;
+					cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl;
 					mixedAverageWeights.Save(filename.str());
 					++weightEpochDump;
 				}
 			}// end dumping
 		} // end of shard loop, end of this epoch
 
-
-		cerr << "Bleu feature history after epoch " <<  epoch << endl;
-		decoder->printBleuFeatureHistory(cerr);
+		if (verbosity > 0) {
+			cerr << "Bleu feature history after epoch " <<  epoch << endl;
+			decoder->printBleuFeatureHistory(cerr);
+		}
 
 		// Check whether there were any weight updates during this epoch
 		size_t sumUpdates;
@@ -1094,131 +1051,30 @@ int main(int argc, char** argv) {
 
 		if (epoch > 0) {
 			if ((sumConstraintChangeAbs_lastEpoch == sumConstraintChangeAbs) && (sumStillViolatedConstraints_lastEpoch == sumStillViolatedConstraints)) {
-				cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints and constraint changes has stayed the same: " << sumStillViolatedConstraints << ", " <<  sumConstraintChangeAbs << endl;
+				VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints and constraint changes has stayed the same: " << sumStillViolatedConstraints << ", " <<  sumConstraintChangeAbs << endl);
 			}
 			else {
-				cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << ", sum of constraint changes " <<  sumConstraintChangeAbs << endl;
+				VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << ", sum of constraint changes " <<  sumConstraintChangeAbs << endl);
 			}
 		}
 		else {
-			cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl;
+			VERBOSE(2, "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl);
 		}
 
 		sumConstraintChangeAbs_lastEpoch = sumConstraintChangeAbs;
 		sumStillViolatedConstraints_lastEpoch = sumStillViolatedConstraints;
-
-		if (min_bleu_change > 0) {
-			if (sumBleuChangeAbs < min_bleu_change) {
-				cerr << "Rank " << rank << ", epoch " << epoch << ", sum of BLEU score changes was smaller than " << min_bleu_change << " (" << sumBleuChangeAbs << ")." << endl;
-				stop = true;
-			}
-			else {
-				cerr << "Rank " << rank << ", epoch " << epoch << ", sum of BLEU score changes: " << sumBleuChangeAbs << "." << endl;
-			}
-		}
 		
 		if (!stop) {
-/*			if (devBleu) {
-				// calculate bleu score of dev set
-				vector<float> bleuAndRatio = decoder->calculateBleuOfCorpus(allBestModelScore, all_ref_ids, epoch, rank);
-				float bleu = bleuAndRatio[0];
-				float ratio = bleuAndRatio[1];
-
-				for (size_t i = 0; i < allBestModelScore.size(); ++i) {
-					for (size_t j = 0; j < allBestModelScore[i].size(); ++j) {
-						delete allBestModelScore[i][j];
-					}
-				}
-
-				if (rank == 0) {
-					beforePrevAverageBleu = prevAverageBleu;
-					beforePrevAverageApproxBleu = prevAverageApproxBleu;
-					prevAverageBleu = averageBleu;
-					prevAverageApproxBleu = averageApproxBleu;
-				}
-
-#ifdef MPI_ENABLE
-				// average bleu across processes
-				sendbuf[0] = bleu;
-				recvbuf[0] = 0;
-				MPI_Reduce(sendbuf, recvbuf, 1, MPI_FLOAT, MPI_SUM, 0, world);
-				if (rank == 0) {
-					averageBleu = recvbuf[0];
-
-					// divide by number of processes
-					averageBleu /= size;
-					cerr << "Average Bleu (dev) after epoch " << epoch << ": " << averageBleu << endl;
-				}
-
-				// average ratio across processes
-				sendbuf[0] = ratio;
-				recvbuf[0] = 0;
-				MPI_Reduce(sendbuf, recvbuf, 1, MPI_FLOAT, MPI_SUM, 0, world);
-				if (rank == 0) {
-					averageRatio = recvbuf[0];
-
-					// divide by number of processes
-					averageRatio /= size;
-					cerr << "Average ratio (dev) after epoch " << epoch << ": " << averageRatio << endl;
-				}
-
-				// average approximate sentence bleu across processes
-				sendbuf[0] = summedApproxBleu/numberOfUpdatesThisEpoch;
-				recvbuf[0] = 0;
-				MPI_Reduce(sendbuf, recvbuf, 1, MPI_FLOAT, MPI_SUM, 0, world);
-				if (rank == 0) {
-					averageApproxBleu = recvbuf[0];
-
-					// divide by number of processes
-					averageApproxBleu /= size;
-					cerr << "Average approx. sentence Bleu (dev) after epoch " << epoch << ": " << averageApproxBleu << endl;
-				}
-#endif
-#ifndef MPI_ENABLE
-				averageBleu = bleu;
-				cerr << "Average Bleu (dev) after epoch " << epoch << ": " << averageBleu << endl;
-				averageApproxBleu = summedApproxBleu / numberOfUpdatesThisEpoch;
-				cerr << "Average approx. sentence Bleu (dev) after epoch " << epoch << ": " << averageApproxBleu << endl;
-#endif
-				if (rank == 0) {
-					if (stop_dev_bleu) {
-						if (averageBleu <= prevAverageBleu && prevAverageBleu <= beforePrevAverageBleu) {
-							stop = true;
-							cerr << "Average Bleu (dev) is decreasing or no more increasing.. stop tuning." << endl;
-							ScoreComponentCollection dummy;
-							ostringstream endfilename;
-							endfilename << "stopping";
-							dummy.Save(endfilename.str());
-						}
-					}
-
-					if (stop_approx_dev_bleu) {
-						if (averageApproxBleu <= prevAverageApproxBleu && prevAverageApproxBleu <= beforePrevAverageApproxBleu) {
-							stop = true;
-							cerr << "Average approx. sentence Bleu (dev) is decreasing or no more increasing.. stop tuning." << endl;
-							ScoreComponentCollection dummy;
-							ostringstream endfilename;
-							endfilename << "stopping";
-							dummy.Save(endfilename.str());
-						}
-					}
-				}
-
-#ifdef MPI_ENABLE
-				mpi::broadcast(world, stop, 0);
-#endif
-			} // end if (dev_bleu) */
-
 			// Test if weights have converged
 			if (weightConvergence) {
 				bool reached = true;
 				if (rank == 0 && (epoch >= 2)) {
 					ScoreComponentCollection firstDiff(mixedAverageWeights);
 					firstDiff.MinusEquals(mixedAverageWeightsPrevious);
-					cerr << "Average weight changes since previous epoch: " << firstDiff << endl;
+					VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << endl);
 					ScoreComponentCollection secondDiff(mixedAverageWeights);
 					secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious);
-					cerr << "Average weight changes since before previous epoch: " << secondDiff << endl << endl;
+					VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << endl << endl);
 
 					// check whether stopping criterion has been reached
 					// (both difference vectors must have all weight changes smaller than min_weight_change)
@@ -1240,7 +1096,7 @@ int main(int argc, char** argv) {
 					if (reached) {
 						// stop MIRA
 						stop = true;
-						cerr << "Stopping criterion has been reached after epoch " << epoch << ".. stopping MIRA." << endl;
+						cerr << "\nWeights have converged after epoch " << epoch << ".. stopping MIRA." << endl;
 						ScoreComponentCollection dummy;
 						ostringstream endfilename;
 						endfilename << "stopping";
@@ -1255,17 +1111,26 @@ int main(int argc, char** argv) {
 #endif
 			} //end if (weightConvergence)
 
-			// if using flexible regularization, decrease regularization parameter for next epoch
+			// if using flexible slack, decrease slack parameter for next epoch
 			if (slack_step > 0) {
 				if (slack - slack_step >= slack_min) {
 					if (typeid(*optimiser) == typeid(MiraOptimiser)) {
 						slack -= slack_step;
-						cerr << "Change slack to: " << slack << endl;
+						VERBOSE(1, "Change slack to: " << slack << endl);
 						((MiraOptimiser*) optimiser)->setSlack(slack);
 					}
 				}
 			}
 
+			// if using flexible margin slack, decrease margin slack parameter for next epoch
+			if (margin_slack_incr > 0.0001) {
+				if (typeid(*optimiser) == typeid(MiraOptimiser)) {
+					margin_slack += margin_slack_incr;
+					VERBOSE(1, "Change margin slack to: " << margin_slack << endl);
+					((MiraOptimiser*) optimiser)->setMarginSlack(margin_slack);
+				}
+			}
+
 			// change learning rate
 			if ((decrease_learning_rate > 0) && (learning_rate - decrease_learning_rate >= min_learning_rate)) {
 				learning_rate -= decrease_learning_rate;
@@ -1276,20 +1141,7 @@ int main(int argc, char** argv) {
 					mpi::broadcast(world, stop, 0);
 #endif
 				}
-				cerr << "Change learning rate to " << learning_rate << endl;
-			}
-
-			// change maximum sentence update
-			if ((decrease_sentence_update > 0) && (max_sentence_update - decrease_sentence_update >= min_sentence_update)) {
-				max_sentence_update -= decrease_sentence_update;
-				if (max_sentence_update <= 0.0001) {
-					max_sentence_update = 0;
-					stop = true;
-#ifdef MPI_ENABLE
-					mpi::broadcast(world, stop, 0);
-#endif
-				}
-				cerr << "Change maximum sentence update to " << max_sentence_update << endl;
+				VERBOSE(1, "Change learning rate to " << learning_rate << endl);
 			}
 		}
 	} // end of epoch loop
@@ -1298,11 +1150,8 @@ int main(int argc, char** argv) {
 	MPI_Finalize();
 #endif
 
-	now = time(0); // get current time
-	tm = localtime(&now); // get struct filled out
-	cerr << "\nEnd date/time: " << tm->tm_mon + 1 << "/" << tm->tm_mday
-			<< "/" << tm->tm_year + 1900 << ", " << tm->tm_hour << ":"
-			<< tm->tm_min << ":" << tm->tm_sec << endl;
+	time(&now);
+	cerr << "Rank " << rank << ", " << ctime(&now);
 
 	delete decoder;
 	exit(0);
diff --git a/mira/MiraOptimiser.cpp b/mira/MiraOptimiser.cpp
index 5231d380d..2447b6787 100644
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@@ -1,5 +1,6 @@
 #include "Optimiser.h"
 #include "Hildreth.h"
+#include "StaticData.h"
 
 using namespace Moses;
 using namespace std;
@@ -14,10 +15,8 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
     const vector<float> oracleBleuScores,
     const vector<size_t> sentenceIds,
     float learning_rate,
-    float max_sentence_update,
     size_t rank,
-    size_t epoch,
-    bool controlUpdates) {
+    size_t epoch) {
 
 	// vector of feature values differences for all created constraints
 	vector<ScoreComponentCollection> featureValueDiffs;
@@ -40,41 +39,44 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 			ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
 			featureValueDiff.MinusEquals(featureValues[i][j]);
 
-			cerr << "feature value diff: " << featureValueDiff << endl;
+			cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl;
 			if (featureValueDiff.GetL1Norm() == 0) {
-				cerr << "Equal feature values, constraint skipped.." << endl;
+				// skip constraint
 				continue;
 			}
 
 			float loss = losses[i][j];
 		    if (m_scale_margin == 1) {
 		    	loss *= oracleBleuScores[i];
-		    	cerr << "Scaling margin with oracle bleu score "  << oracleBleuScores[i] << endl;
+		    	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score "  << oracleBleuScores[i] << endl);
 		    }
 		    else if (m_scale_margin == 2) {
 		    	loss *= log2(oracleBleuScores[i]);
-		    	cerr << "Scaling margin with log2 oracle bleu score "  << log2(oracleBleuScores[i]) << endl;
+		    	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log2 oracle bleu score "  << log2(oracleBleuScores[i]) << endl);
 		    }
 		    else if (m_scale_margin == 10) {
 		    	loss *= log10(oracleBleuScores[i]);
-		    	cerr << "Scaling margin with log10 oracle bleu score "  << log10(oracleBleuScores[i]) << endl;
+		    	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log10 oracle bleu score "  << log10(oracleBleuScores[i]) << endl)
 		    }
 
 		  	// check if constraint is violated
 		    bool violated = false;
 		    bool addConstraint = true;
 		    float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
-		    float diff = loss - (modelScoreDiff + m_precision);
-		    cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl;
+		    float diff = 0;
+			if (loss > (modelScoreDiff + m_margin_slack)) {
+				diff = loss - (modelScoreDiff + m_margin_slack);
+			}
+			cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " <<  m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
+
 		    if (diff > epsilon) {
 		    	violated = true;
-		    	cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << diff << endl;
 		    }
 		    else if (m_onlyViolatedConstraints) {
 		    	addConstraint = false;
 			}
 
-		    float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_precision);
+		    float lossMinusModelScoreDiff = loss - modelScoreDiff;
 		    if (addConstraint) {
 		    	featureValueDiffs.push_back(featureValueDiff);
 		    	lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
@@ -92,8 +94,8 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	vector<float> alphas;
 	ScoreComponentCollection summedUpdate;
 	if (violatedConstraintsBefore > 0) {
-	  cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << featureValueDiffs.size() << endl;
-	  cerr << "Rank " << rank << ", epoch " << epoch << ", number of violated constraints passed to optimizer: " << violatedConstraintsBefore << endl;
+	  cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " <<
+			  featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl;
 	  if (m_slack != 0) {
 	    alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack);
 	  } else {
@@ -104,7 +106,7 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	  // * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis))
 	  for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
 	  	float alpha = alphas[k];
-	  	cerr << "alpha: " << alpha << endl;
+	  	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl);
 	  	ScoreComponentCollection update(featureValueDiffs[k]);
 	    update.MultiplyEquals(alpha);
 	    
@@ -113,11 +115,10 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	  }
 	} 
 	else {
-	  cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl;
-	  vector<int> status(3);
-	  status[0] = 1;
+		cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl;
+	  vector<int> status(2);
+	  status[0] = 0;
 	  status[1] = 0;
-	  status[2] = 0;
 	  return status;
 	}
 
@@ -130,56 +131,37 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
 		float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights);
 		float loss = all_losses[i];
-		float diff = loss - (modelScoreDiff + m_precision);
+		float diff = loss - (modelScoreDiff + m_margin_slack);
 		if (diff > epsilon) {
 			++violatedConstraintsAfter;
 			newDistanceFromOptimum += diff;
 		}
 	}
-	cerr << "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl;
-	cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl;
-
-	if (controlUpdates && violatedConstraintsAfter > 0) {
-		float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
-		if ((violatedConstraintsBefore - violatedConstraintsAfter) <= 0 && distanceChange < 0) {
-			vector<int> statusPlus(3);
-			statusPlus[0] = -1;
-			statusPlus[1] = -1;
-			statusPlus[2] = -1;
-			return statusPlus;
-		}
-	}
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
 
 	// apply learning rate
 	if (learning_rate != 1) {
-		cerr << "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl;
+		VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl);
 		summedUpdate.MultiplyEquals(learning_rate);
-		cerr << "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl;
-	}
-
-	// apply threshold scaling
-	if (max_sentence_update != -1) {
-		cerr << "Rank " << rank << ", epoch " << epoch << ", update before scaling to max-sentence-update: " << summedUpdate << endl;
-		summedUpdate.ThresholdScaling(max_sentence_update);
-		cerr << "Rank " << rank << ", epoch " << epoch << ", update after scaling to max-sentence-update: " << summedUpdate << endl;
+		VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl);
 	}
 
 	// scale update by BLEU of oracle
 	if (oracleBleuScores.size() == 1 && m_scale_update) {
-		cerr << "Scaling summed update with log10 oracle bleu score " << log10(oracleBleuScores[0]) << endl;
+		VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling summed update with log10 oracle bleu score " << log10(oracleBleuScores[0]) << endl);
 		summedUpdate.MultiplyEquals(log10(oracleBleuScores[0]));
 	}
 
 	// apply update to weight vector
-	cerr << "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl;
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl);
 	currWeights.PlusEquals(summedUpdate);
-	cerr << "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl;
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
 
-	vector<int> statusPlus(3);
-	statusPlus[0] = 0;
-	statusPlus[1] = violatedConstraintsBefore;
-	statusPlus[2] = violatedConstraintsAfter;
-	return statusPlus;
+	vector<int> status(2);
+	status[0] = violatedConstraintsBefore;
+	status[1] = violatedConstraintsAfter;
+	return status;
 }
 
 vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
@@ -189,10 +171,8 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 		const std::vector<std::vector<float> >& bleuScoresFear,
 		const std::vector< size_t> sentenceIds,
 		float learning_rate,
-		float max_sentence_update,
 		size_t rank,
-		size_t epoch,
-		bool controlUpdates) {
+		size_t epoch) {
 
 	// vector of feature values differences for all created constraints
 	vector<ScoreComponentCollection> featureValueDiffs;
@@ -216,41 +196,44 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 			for (size_t k = 0; k < featureValuesFear[i].size(); ++k) {
 				ScoreComponentCollection featureValueDiff = featureValuesHope[i][j];
 				featureValueDiff.MinusEquals(featureValuesFear[i][k]);
-				cerr << "feature value diff: " << featureValueDiff << endl;
+				cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl;
 				if (featureValueDiff.GetL1Norm() == 0) {
-					cerr << "Equal feature values, constraint skipped.." << endl;
+					// skip constraint
 					continue;
 				}
 
 				float loss = bleuScoresHope[i][j] - bleuScoresFear[i][k];
 				if (m_scale_margin == 1) {
 					loss *= bleuScoresHope[i][j];
-					cerr << "Scaling margin with oracle bleu score "  << bleuScoresHope[i][j] << endl;
+					VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score "  << bleuScoresHope[i][j] << endl);
 				}
 				else if (m_scale_margin == 2) {
 					loss *= log2(bleuScoresHope[i][j]);
-					cerr << "Scaling margin with log2 oracle bleu score "  << log2(bleuScoresHope[i][j]) << endl;
+					VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log2 oracle bleu score "  << log2(bleuScoresHope[i][j]) << endl);
 				}
 				else if (m_scale_margin == 10) {
 					loss *= log10(bleuScoresHope[i][j]);
-					cerr << "Scaling margin with log10 oracle bleu score "  << log10(bleuScoresHope[i][j]) << endl;
+					VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling margin with log10 oracle bleu score "  << log10(bleuScoresHope[i][j]) << endl);
 				}
 
 				// check if constraint is violated
 				bool violated = false;
 				bool addConstraint = true;
 				float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
-				float diff = loss - (modelScoreDiff + m_precision);
-				cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl;
+				float diff = 0;
+				if (loss > (modelScoreDiff + m_margin_slack)) {
+					diff = loss - (modelScoreDiff + m_margin_slack);
+				}
+				cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
+
 				if (diff > epsilon) {
 					violated = true;
-					cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << diff << endl;
 				}
 				else if (m_onlyViolatedConstraints) {
 					addConstraint = false;
 				}
 
-				float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_precision);
+				float lossMinusModelScoreDiff = loss - modelScoreDiff;
 				if (addConstraint) {
 					featureValueDiffs.push_back(featureValueDiff);
 					lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
@@ -269,8 +252,8 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 	vector<float> alphas;
 	ScoreComponentCollection summedUpdate;
 	if (violatedConstraintsBefore > 0) {
-	  cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << featureValueDiffs.size() << endl;
-	  cerr << "Rank " << rank << ", epoch " << epoch << ", number of violated constraints passed to optimizer: " << violatedConstraintsBefore << endl;
+	  cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " <<
+			  featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl;
 	  if (m_slack != 0) {
 	    alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack);
 	  } else {
@@ -281,17 +264,17 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 	  // * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis))
 	  for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
 	  	float alpha = alphas[k];
-	  	cerr << "alpha: " << alpha << endl;
+	  	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl);
 	  	ScoreComponentCollection update(featureValueDiffs[k]);
 	    update.MultiplyEquals(alpha);
 
 	  	// scale update by BLEU of hope translation (only two cases defined at the moment)
 	    if (featureValuesHope.size() == 1 && m_scale_update) { // only defined for batch size 1)
 	    	if (featureValuesHope[0].size() == 1) {
-	    		cerr << "Scaling update with log10 oracle bleu score "  << log10(bleuScoresHope[0][0]) << endl; // only 1 oracle
+	    		VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling update with log10 oracle bleu score "  << log10(bleuScoresHope[0][0]) << endl); // only 1 oracle
 	    		update.MultiplyEquals(log10(bleuScoresHope[0][0]));
 	    	} else if (featureValuesFear[0].size() == 1) {
-	    		cerr << "Scaling update with log10 oracle bleu score "  << log10(bleuScoresHope[0][k]) << endl; // k oracles
+	    		VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", scaling update with log10 oracle bleu score "  << log10(bleuScoresHope[0][k]) << endl); // k oracles
 	    		update.MultiplyEquals(log10(bleuScoresHope[0][k]));
 			}
 		}
@@ -301,11 +284,10 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 	  }
 	}
 	else {
-	  cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl;
-	  vector<int> status(3);
-	  status[0] = 1;
+		cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl;
+	  vector<int> status(2);
+	  status[0] = 0;
 	  status[1] = 0;
-	  status[2] = 0;
 	  return status;
 	}
 
@@ -318,49 +300,30 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
 		float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights);
 		float loss = all_losses[i];
-		float diff = loss - (modelScoreDiff + m_precision);
+		float diff = loss - (modelScoreDiff + m_margin_slack);
 		if (diff > epsilon) {
 			++violatedConstraintsAfter;
 			newDistanceFromOptimum += diff;
 		}
 	}
-	cerr << "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl;
-	cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl;
-
-	if (controlUpdates && violatedConstraintsAfter > 0) {
-		float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
-		if ((violatedConstraintsBefore - violatedConstraintsAfter) <= 0 && distanceChange < 0) {
-			vector<int> statusPlus(3);
-			statusPlus[0] = -1;
-			statusPlus[1] = -1;
-			statusPlus[2] = -1;
-			return statusPlus;
-	  }
-	}
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
 
 	// Apply learning rate (fixed or flexible)
 	if (learning_rate != 1) {
-		cerr << "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl;
+		VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl);
 		summedUpdate.MultiplyEquals(learning_rate);
-		cerr << "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl;
-	}
-
-	// Apply threshold scaling
-	if (max_sentence_update != -1) {
-		cerr << "Rank " << rank << ", epoch " << epoch << ", update before scaling to max-sentence-update: " << summedUpdate << endl;
-		summedUpdate.ThresholdScaling(max_sentence_update);
-		cerr << "Rank " << rank << ", epoch " << epoch << ", update after scaling to max-sentence-update: " << summedUpdate << endl;
+		VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl);
 	}
 
 	// apply update to weight vector
-	cerr << "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl;
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl);
 	currWeights.PlusEquals(summedUpdate);
-	cerr << "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl;
+	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
 
-	vector<int> statusPlus(3);
-	statusPlus[0] = 0;
-	statusPlus[1] = violatedConstraintsBefore;
-	statusPlus[2] = violatedConstraintsAfter;
+	vector<int> statusPlus(2);
+	statusPlus[0] = violatedConstraintsBefore;
+	statusPlus[1] = violatedConstraintsAfter;
 	return statusPlus;
 }
 
@@ -371,26 +334,27 @@ vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& c
     float bleuScoreFear,
     size_t sentenceId,
     float learning_rate,
-    float max_sentence_update,
     size_t rank,
-    size_t epoch,
-    bool controlUpdates) {
+    size_t epoch) {
 
   float epsilon = 0.0001;
   float oldDistanceFromOptimum = 0;
   bool constraintViolatedBefore = false;
   ScoreComponentCollection weightUpdate;
 
-  cerr << "hope: " << featureValuesHope << endl;
-  cerr << "fear: " << featureValuesFear << endl;
+ // cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl;
+ // cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl;
   ScoreComponentCollection featureValueDiff = featureValuesHope;
   featureValueDiff.MinusEquals(featureValuesFear);
-  cerr << "hope - fear: " << featureValueDiff << endl;
+  cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
   float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
   float loss = bleuScoreHope - bleuScoreFear;
-  float diff = loss - (modelScoreDiff + m_precision);
-  // approximate comparison between floats
-	cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl;
+  float diff = 0;
+  if (loss > (modelScoreDiff + m_margin_slack)) {
+	  diff = loss - (modelScoreDiff + m_margin_slack);
+  }
+  cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
+
   if (diff > epsilon) {
     // constraint violated
     oldDistanceFromOptimum += diff;
@@ -417,17 +381,16 @@ vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& c
     	weightUpdate.PlusEquals(featureValueDiff);
     }
     else {
-    	cerr << "Rank " << rank << ", epoch " << epoch << ", no update because squared norm is 0, can only happen if oracle == hypothesis, are bleu scores equal as well?" << endl;
+    	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", no update because squared norm is 0" << endl);
     }
   }
 
   if (!constraintViolatedBefore) {
     // constraint satisfied, nothing to do
-    cerr << "Rank " << rank << ", epoch " << epoch << ", check, constraint already satisfied" << endl;
-    vector<int> status(3);
-    status[0] = 1;
+	cerr << "Rank " << rank << ", epoch " << epoch << ", constraint already satisfied" << endl;
+    vector<int> status(2);
+    status[0] = 0;
     status[1] = 0;
-    status[2] = 0;
     return status;
   }
 
@@ -439,35 +402,25 @@ vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& c
   featureValueDiff = featureValuesHope;
   featureValueDiff.MinusEquals(featureValuesFear);
   modelScoreDiff = featureValueDiff.InnerProduct(newWeights);
-  diff = loss - (modelScoreDiff + m_precision);
+  diff = loss - (modelScoreDiff + m_margin_slack);
   // approximate comparison between floats!
   if (diff > epsilon) {
     constraintViolatedAfter = true;
-    newDistanceFromOptimum += (loss - (modelScoreDiff + m_precision));
+    newDistanceFromOptimum += (loss - modelScoreDiff);
   }
 
-  cerr << "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl;
-  cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl;
-
-  float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
-  if (controlUpdates && constraintViolatedAfter && distanceChange < 0) {
-    vector<int> statusPlus(3);
-    statusPlus[0] = -1;
-    statusPlus[1] = 1;
-    statusPlus[2] = 1;
-    return statusPlus;
-  }
+  VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl);
+  VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
 
   // apply update to weight vector
-  cerr << "Rank " << rank << ", weights before update: " << currWeights << endl;
+  VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl);
   currWeights.PlusEquals(weightUpdate);
-  cerr << "Rank " << rank << ", weights after update: " << currWeights << endl;
+  VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl);
 
-  vector<int> statusPlus(3);
-  statusPlus[0] = 0;
-  statusPlus[1] = 1;
-  statusPlus[2] = constraintViolatedAfter ? 1 : 0;
-  return statusPlus;
+  vector<int> status(2);
+  status[0] = 1;
+  status[1] = constraintViolatedAfter ? 1 : 0;
+  return status;
 }
 
 }
diff --git a/mira/Optimiser.h b/mira/Optimiser.h
index 025faca1a..18b75fb65 100644
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@@ -36,11 +36,9 @@ namespace Mira {
 				  const std::vector<std::vector<float> >& bleuScoresHope,
 				  const std::vector<std::vector<float> >& bleuScoresFear,
 				  const std::vector< size_t> sentenceIds,
-					float learning_rate,
-					float max_sentence_update,
-					size_t rank,
-					size_t epoch,
-					bool controlUpdates) = 0;
+				  float learning_rate,
+				  size_t rank,
+				  size_t epoch) = 0;
   };
  
   class Perceptron : public Optimiser {
@@ -52,10 +50,8 @@ namespace Mira {
 					const std::vector<std::vector<float> >& bleuScoresFear,
 					const std::vector< size_t> sentenceIds,
 					float learning_rate,
-  				float max_sentence_update,
-  				size_t rank,
-  				size_t epoch,
-  				bool controlUpdates);
+					size_t rank,
+					size_t epoch);
   };
 
   class MiraOptimiser : public Optimiser {
@@ -63,13 +59,13 @@ namespace Mira {
 	  MiraOptimiser() :
 		  Optimiser() { }
 
-	  MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, bool scale_update, float precision) :
+	  MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, bool scale_update, float margin_slack) :
 		  Optimiser(),
 		  m_onlyViolatedConstraints(onlyViolatedConstraints),
 		  m_slack(slack),
 		  m_scale_margin(scale_margin),
 		  m_scale_update(scale_update),
-		  m_precision(precision) { }
+		  m_margin_slack(margin_slack) { }
    
      std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights,
 							Moses::ScoreComponentCollection& featureValuesHope,
@@ -78,10 +74,8 @@ namespace Mira {
 							float bleuScoresFear,
 							size_t sentenceId,
 							float learning_rate,
-							float max_sentence_update,
 							size_t rank,
-							size_t epoch,
-							bool controlUpdates);
+							size_t epoch);
      std::vector<int> updateWeights(Moses::ScoreComponentCollection& currWeights,
       						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
       						  const std::vector<std::vector<float> >& losses,
@@ -89,27 +83,27 @@ namespace Mira {
       						  const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
       						  const std::vector< float> oracleBleuScores,
       						  const std::vector< size_t> sentenceIds,
-										float learning_rate,
-										float max_sentence_update,
-										size_t rank,
-										size_t epoch,
-										bool controlUpdates);
+      						  float learning_rate,
+      						  size_t rank,
+      						  size_t epoch);
      virtual std::vector<int> updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
       						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
       						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
       						  const std::vector<std::vector<float> >& bleuScoresHope,
       						  const std::vector<std::vector<float> >& bleuScoresFear,
       						  const std::vector< size_t> sentenceIds,
-										float learning_rate,
-										float max_sentence_update,
-										size_t rank,
-										size_t epoch,
-										bool controlUpdates);
+      						  float learning_rate,
+      						  size_t rank,
+      						  size_t epoch);
 
      void setSlack(float slack) {
     	 m_slack = slack;
      }
 
+     void setMarginSlack(float margin_slack) {
+    	 m_margin_slack = margin_slack;
+     }
+
    private:
 
       // add only violated constraints to the optimisation problem
@@ -123,7 +117,7 @@ namespace Mira {
       // scale update with log 10 of oracle BLEU score
       bool m_scale_update;
 
-      float m_precision;
+      float m_margin_slack;
   };
 }
 
diff --git a/mira/Perceptron.cpp b/mira/Perceptron.cpp
index f8ea0ce45..315281410 100644
--- a/mira/Perceptron.cpp
+++ b/mira/Perceptron.cpp
@@ -31,10 +31,8 @@ vector<int> Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeig
 		const vector< vector<float> >& dummy2,
 		const vector< size_t> dummy3,
 		float perceptron_learning_rate,
-		float dummy4,
 		size_t rank,
-		size_t epoch,
-		bool dummy5)
+		size_t epoch)
 {
 	cerr << "hope: " << featureValuesHope[0][0] << endl;
 	cerr << "fear: " << featureValuesFear[0][0] << endl;