remove multiple oracles, remove accumulating constraints

git-svn-id: http://svn.statmt.org/repository/mira@3908 cc96ff50-19ce-11e0-b349-13d7f0bd23df
2024-09-20 07:42:21 +03:00 · 2011-06-10 14:14:40 +00:00 · 2011-06-10 14:14:40 +00:00 · 4e0f848d50
commit 4e0f848d50
parent 72da32f0cb
3 changed files with 104 additions and 394 deletions
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@ -120,34 +120,6 @@ struct RandomIndex {
 	}
 };

-void shuffleInput(vector<size_t>& order, size_t size, size_t inputSize) {
-	cerr << "Shuffling input examples.." << endl;
-//	RandomIndex rindex;
-//	random_shuffle(order.begin(), order.end(), rindex);
-
-	// remove first element and put it in the back
-	size_t first = order.at(0);
-	size_t index = 0;
-	order.erase(order.begin());
-	order.push_back(first);
-}
-
-void createShard(vector<size_t>& order, size_t size, size_t rank, vector<size_t>& shard) {
-	// Create the shards according to the number of processes used
-	float shardSize = (float) (order.size()) / size;
-	size_t shardStart = (size_t) (shardSize * rank);
-	size_t shardEnd = (size_t) (shardSize * (rank + 1));
-	if (rank == size - 1)
-		shardEnd = order.size();
-	shard.resize(shardSize);
-	copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
-	cerr << "order: ";
-	for (size_t i = 0; i < shard.size(); ++i) {
-		cerr << shard[i] << " ";
-	}
-	cerr << endl;
-}
-
 int main(int argc, char** argv) {
 	size_t rank = 0;
 	size_t size = 1;
@ -186,10 +158,7 @@ int main(int argc, char** argv) {
 	float slack;
 	float slack_step;
 	float slack_min;
-	size_t maxNumberOracles;
-	bool accumulateMostViolatedConstraints;
 	bool averageWeights;
-	bool pastAndCurrentConstraints;
 	bool weightConvergence;
 	bool controlUpdates;
 	float learning_rate;
@ -225,62 +194,58 @@ int main(int argc, char** argv) {
 	int fear_n;
 	po::options_description desc("Allowed options");
 	desc.add_options()
-			("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example")
-			("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
-			("analytical-update",  po::value<bool>(&analytical_update)->default_value(0), "Use one best lists and compute the update analytically")
-			("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
-			("base-of-log", po::value<size_t>(&baseOfLog)->default_value(10), "Base for log-ing feature values")
-			("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
-			("bleu-score-weight", po::value<float>(&bleuScoreWeight)->default_value(1.0), "Bleu score weight used in the decoder objective function (on top of the bleu objective weight)")
-			("burn-in", po::value<bool>(&burnIn)->default_value(false), "Do a burn-in of the BLEU history before training")
-			("burn-in-input-file", po::value<string>(&burnInInputFile), "Input file for burn-in phase of BLEU history")
-			("burn-in-reference-files", po::value<vector<string> >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history")
-			("config,f", po::value<string>(&mosesConfigFile), "Moses ini file")
-			("control-updates", po::value<bool>(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error")
-			("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
-			("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
-			("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
-			("decr-sentence-update", po::value<float>(&decrease_sentence_update)->default_value(0), "Decrease maximum weight update by the given value after every epoch")
-			("dev-bleu", po::value<bool>(&devBleu)->default_value(true), "Compute BLEU score of oracle translations of the whole tuning set")
-			("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use nbest list with distinct translations in inference step")
-			("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
-			("epochs,e", po::value<size_t>(&epochs)->default_value(5), "Number of epochs")
-			("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
-			("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
-			("history-of-1best", po::value<bool>(&historyOf1best)->default_value(0), "Use the 1best translation to update the history")
-			("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
-			("hope-fear", po::value<bool>(&hope_fear)->default_value(true), "Use only hope and fear translations for optimization (not model)")
-			("hope-n", po::value<int>(&hope_n)->default_value(-1), "Number of hope translations used")
-			("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
-			("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
-			("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
-			("log-feature-values", po::value<bool>(&logFeatureValues)->default_value(false), "Take log of feature values according to the given base.")
-			("max-number-oracles", po::value<size_t>(&maxNumberOracles)->default_value(1), "Set a maximum number of oracles to use per example")
-			("min-bleu-change", po::value<float>(&min_bleu_change)->default_value(0), "Minimum BLEU change of 1best translations of one epoch")
-			("min-sentence-update", po::value<float>(&min_sentence_update)->default_value(0), "Set a minimum weight update per sentence")
-			("min-learning-rate", po::value<float>(&min_learning_rate)->default_value(0), "Set a minimum learning rate")
-			("max-sentence-update", po::value<float>(&max_sentence_update)->default_value(-1), "Set a maximum weight update per sentence")
-			("min-weight-change", po::value<float>(&min_weight_change)->default_value(0.01), "Set minimum weight change for stopping criterion")
-			("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(1), "How often per epoch to mix weights, when using mpi")
-			("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimization")
-			("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
-	    ("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
-			("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
-	    ("past-and-current-constraints", po::value<bool>(&pastAndCurrentConstraints)->default_value(false), "Accumulate most violated constraint per example and use them along all current constraints")
-	    ("perceptron-learning-rate", po::value<float>(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate")
-	    ("precision", po::value<float>(&precision)->default_value(0), "Precision when comparing left and right hand side of constraints")
-	    ("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
-	    ("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
-	    ("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
-	    ("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(false), "Use a sentences level bleu scoring function")
-	    ("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
+		("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
+		("analytical-update",  po::value<bool>(&analytical_update)->default_value(0), "Use one best lists and compute the update analytically")
+		("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
+		("base-of-log", po::value<size_t>(&baseOfLog)->default_value(10), "Base for log-ing feature values")
+		("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
+		("bleu-score-weight", po::value<float>(&bleuScoreWeight)->default_value(1.0), "Bleu score weight used in the decoder objective function (on top of the bleu objective weight)")
+		("burn-in", po::value<bool>(&burnIn)->default_value(false), "Do a burn-in of the BLEU history before training")
+		("burn-in-input-file", po::value<string>(&burnInInputFile), "Input file for burn-in phase of BLEU history")
+		("burn-in-reference-files", po::value<vector<string> >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history")
+		("config,f", po::value<string>(&mosesConfigFile), "Moses ini file")
+		("control-updates", po::value<bool>(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error")
+		("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
+		("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
+		("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
+		("decr-sentence-update", po::value<float>(&decrease_sentence_update)->default_value(0), "Decrease maximum weight update by the given value after every epoch")
+		("dev-bleu", po::value<bool>(&devBleu)->default_value(true), "Compute BLEU score of oracle translations of the whole tuning set")
+		("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use nbest list with distinct translations in inference step")
+		("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
+		("epochs,e", po::value<size_t>(&epochs)->default_value(5), "Number of epochs")
+		("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
+		("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+		("history-of-1best", po::value<bool>(&historyOf1best)->default_value(0), "Use the 1best translation to update the history")
+		("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
+		("hope-fear", po::value<bool>(&hope_fear)->default_value(true), "Use only hope and fear translations for optimization (not model)")
+		("hope-n", po::value<int>(&hope_n)->default_value(-1), "Number of hope translations used")
+		("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
+		("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
+		("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
+		("log-feature-values", po::value<bool>(&logFeatureValues)->default_value(false), "Take log of feature values according to the given base.")
+		("min-bleu-change", po::value<float>(&min_bleu_change)->default_value(0), "Minimum BLEU change of 1best translations of one epoch")
+		("min-sentence-update", po::value<float>(&min_sentence_update)->default_value(0), "Set a minimum weight update per sentence")
+		("min-learning-rate", po::value<float>(&min_learning_rate)->default_value(0), "Set a minimum learning rate")
+		("max-sentence-update", po::value<float>(&max_sentence_update)->default_value(-1), "Set a maximum weight update per sentence")
+		("min-weight-change", po::value<float>(&min_weight_change)->default_value(0.01), "Set minimum weight change for stopping criterion")
+		("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(1), "How often per epoch to mix weights, when using mpi")
+		("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimization")
+		("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
+		("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
+		("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
+		("perceptron-learning-rate", po::value<float>(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate")
+		("precision", po::value<float>(&precision)->default_value(0), "Precision when comparing left and right hand side of constraints")
+		("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
+		("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
+		("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
+		("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(false), "Use a sentences level bleu scoring function")
+		("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
 	    ("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimizer")
 	    ("slack-min", po::value<float>(&slack_min)->default_value(0.01), "Minimum slack used")
 	    ("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
 	    ("stop-dev-bleu", po::value<bool>(&stop_dev_bleu)->default_value(false), "Stop when average Bleu (dev) decreases (or no more increases)")
 	    ("stop-approx-dev-bleu", po::value<bool>(&stop_approx_dev_bleu)->default_value(false), "Stop when average approx. sentence Bleu (dev) decreases (or no more increases)")
 	    ("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
-	    ("train-linear-classifier", po::value<bool>(&train_linear_classifier)->default_value(false), "Test algorithm for linear classification")
 	    ("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
 	    ("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
 	    ("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
@ -389,8 +354,7 @@ int main(int argc, char** argv) {
 	Optimiser* optimiser = NULL;
 	if (learner == "mira") {
 		cerr << "Optimising using Mira" << endl;
-		optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update,
-				maxNumberOracles, accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size(), precision);
+		optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, precision);
 		learning_rate = mira_learning_rate;
 		perceptron_update = false;
 	} else if (learner == "perceptron") {
@ -407,12 +371,6 @@ int main(int argc, char** argv) {
 	}

 	// resolve parameter dependencies
-
-	if (accumulateMostViolatedConstraints && pastAndCurrentConstraints) {
-	  cerr << "Error: the parameters --accumulate-most-violated-constraints and --past-and-current-constraints are mutually exclusive" << endl;
-	  return 1;
-	}
-
 	if (perceptron_update || analytical_update) {
 		batchSize = 1;
 		cerr << "Setting batch size to 1 for perceptron/analytical update" << endl;
@ -542,7 +500,7 @@ int main(int argc, char** argv) {
 	mpi::broadcast(world, order, 0);
 #endif

-	// Create the shards according to the number of processes used
+	// Create shards according to the number of processes used
 	vector<size_t> shard;
 	float shardSize = (float) (order.size()) / size;
 	VERBOSE(1, "Shard size: " << shardSize << endl);
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@ -19,43 +19,6 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
    size_t epoch,
    bool controlUpdates) {

-	// add every oracle in batch to list of oracles (under certain conditions)
-	for (size_t i = 0; i < oracleFeatureValues.size(); ++i) {
-		float newWeightedScore = oracleFeatureValues[i].InnerProduct(currWeights);
-		size_t sentenceId = sentenceIds[i];
-
-		// compare new oracle with existing oracles:
-		// if same translation exists, just update the bleu score
-		// if not, add the oracle
-		bool updated = false;
-		size_t indexOfWorst = 0;
-		float worstWeightedScore = 0;
-		for (size_t j = 0; j < m_oracles[sentenceId].size(); ++j) {
-			float currentWeightedScore = m_oracles[sentenceId][j].InnerProduct(currWeights);
-			if (currentWeightedScore == newWeightedScore) {
-			  cerr << "Rank " << rank << ", epoch " << epoch << ", bleu score of oracle updated at batch position " << i << ", " << m_bleu_of_oracles[sentenceId][j] << " --> " << oracleBleuScores[j] << endl;
-				m_bleu_of_oracles[sentenceId][j] = oracleBleuScores[j];
-				updated = true;
-				break;
-			} else if (worstWeightedScore == 0 || currentWeightedScore
-			    > worstWeightedScore) {
-				worstWeightedScore = currentWeightedScore;
-				indexOfWorst = j;
-			}
-		}
-
-		if (!updated) {
-			// add if number of maximum oracles not exceeded, otherwise override the worst
-			if (m_max_number_oracles > m_oracles[sentenceId].size()) {
-				m_oracles[sentenceId].push_back(oracleFeatureValues[i]);
-				m_bleu_of_oracles[sentenceId].push_back(oracleBleuScores[i]);
-			} else {
-				m_oracles[sentenceId][indexOfWorst] = oracleFeatureValues[i];
-				m_bleu_of_oracles[sentenceId][indexOfWorst] = oracleBleuScores[i];
-			}
-		}
-	}
-
 	// vector of feature values differences for all created constraints
 	vector<ScoreComponentCollection> featureValueDiffs;
 	vector<float> lossMinusModelScoreDiffs;
@ -63,7 +26,6 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,

 	// most violated constraint in batch
 	ScoreComponentCollection max_batch_featureValueDiff;
-	float max_batch_loss = -1;
 	float max_batch_lossMinusModelScoreDiff = -1;

 	// Make constraints for new hypothesis translations
@ -72,21 +34,19 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	float oldDistanceFromOptimum = 0;
 	// iterate over input sentences (1 (online) or more (batch))
 	for (size_t i = 0; i < featureValues.size(); ++i) {
-		size_t sentenceId = sentenceIds[i];
-		if (m_oracles[sentenceId].size() > 1)
-			cerr << "Rank " << rank << ", available oracles for source sentence " << sentenceId << ": "  << m_oracles[sentenceId].size() << endl;
+		//size_t sentenceId = sentenceIds[i];
 		// iterate over hypothesis translations for one input sentence
 		for (size_t j = 0; j < featureValues[i].size(); ++j) {
-			for (size_t k = 0; k < m_oracles[sentenceId].size(); ++k) {
-				ScoreComponentCollection featureValueDiff = m_oracles[sentenceId][k];
-				featureValueDiff.MinusEquals(featureValues[i][j]);
-				cerr << "feature value diff: " << featureValueDiff << endl;
-				if (featureValueDiff.GetL1Norm() == 0) {
-					cerr << "Equal feature values, constraint skipped.." << endl;
-					continue;
-				}
+			ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
+			featureValueDiff.MinusEquals(featureValues[i][j]);

-				float loss = losses[i][j];
+			cerr << "feature value diff: " << featureValueDiff << endl;
+			if (featureValueDiff.GetL1Norm() == 0) {
+				cerr << "Equal feature values, constraint skipped.." << endl;
+				continue;
+			}
+
+			float loss = losses[i][j];
 		    if (m_scale_margin == 1) {
 		    	loss *= oracleBleuScores[i];
 		    	cerr << "Scaling margin with oracle bleu score "  << oracleBleuScores[i] << endl;
@ -101,128 +61,31 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 		    }

 		  	// check if constraint is violated
-				bool violated = false;
-				bool addConstraint = true;
-				float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
-				float diff = loss - (modelScoreDiff + m_precision);
-				cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl;
-				if (diff > epsilon) {
-					violated = true;
-					cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << diff << endl;
-				}
-				else if (m_onlyViolatedConstraints) {
-					addConstraint = false;
-				}
-
-				float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_precision);
-				if (violated) {
-					if (m_accumulateMostViolatedConstraints || m_pastAndCurrentConstraints) {
-						// find the most violated constraint per batch
-						if (lossMinusModelScoreDiff > max_batch_lossMinusModelScoreDiff) {
-							max_batch_lossMinusModelScoreDiff = lossMinusModelScoreDiff;
-							max_batch_featureValueDiff = featureValueDiff;
-							max_batch_loss = loss;
-					  }
-					}
-				}
-
-				if (addConstraint && !m_accumulateMostViolatedConstraints) {
-					featureValueDiffs.push_back(featureValueDiff);
-					lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
-					all_losses.push_back(loss);
-
-					if (violated) {
-						++violatedConstraintsBefore;
-						oldDistanceFromOptimum += diff;
-					}
-				}
-			}
-		}
-	}
-
-	if (m_pastAndCurrentConstraints || m_accumulateMostViolatedConstraints) {
-		cerr << "Rank " << rank << ", epoch " << epoch << ", number of current constraints: " << featureValueDiffs.size() << endl;
-		cerr << "Rank " << rank << ", epoch " << epoch << ", number of current violated constraints: " << violatedConstraintsBefore << endl;
-	}
-
-	if (m_max_number_oracles == 1) {
-		for (size_t k = 0; k < sentenceIds.size(); ++k) {
-			size_t sentenceId = sentenceIds[k];
-			m_oracles[sentenceId].clear();
-		}
-	}
-
-	size_t pastViolatedConstraints = 0;
-	// Add constraints from past iterations (BEFORE updating that list)
-	if (m_pastAndCurrentConstraints || m_accumulateMostViolatedConstraints) {
-	  // add all past (most violated) constraints to the list of current constraints, computed with current weights!
-	  for (size_t i = 0; i < m_featureValueDiffs.size(); ++i) {
-	  	float modelScoreDiff = m_featureValueDiffs[i].InnerProduct(currWeights);
-
-	  	// check if constraint is violated
-			bool violated = false;
-			bool addConstraint = true;
-			float diff = m_losses[i] - (modelScoreDiff + m_precision);
-			if (diff > epsilon) {
-				violated = true;
-				cerr << "Rank " << rank << ", epoch " << epoch << ", past violation: " << diff << endl;
-			}
-			else if (m_onlyViolatedConstraints) {
-				addConstraint = false;
+		    bool violated = false;
+		    bool addConstraint = true;
+		    float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
+		    float diff = loss - (modelScoreDiff + m_precision);
+		    cerr << "constraint: " << (modelScoreDiff + m_precision) << " >= " << loss << endl;
+		    if (diff > epsilon) {
+		    	violated = true;
+		    	cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << diff << endl;
+		    }
+		    else if (m_onlyViolatedConstraints) {
+		    	addConstraint = false;
 			}

-	    if (addConstraint) {
-	    	featureValueDiffs.push_back(m_featureValueDiffs[i]);
-	    	lossMinusModelScoreDiffs.push_back(m_losses[i] - (modelScoreDiff + m_precision));
-	    	all_losses.push_back(m_losses[i]);
-//	    	cerr << "old constraint: " << (modelScoreDiff + m_precision) << " >= " << m_losses[i] << endl;
+		    float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_precision);
+		    if (addConstraint) {
+		    	featureValueDiffs.push_back(featureValueDiff);
+		    	lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
+		    	all_losses.push_back(loss);

-	    	if (violated) {
-	    		++violatedConstraintsBefore;
-	    		++pastViolatedConstraints;
-	    		oldDistanceFromOptimum += diff;
-	    	}
-	    }
-	  }
-	}
-
-	if (m_pastAndCurrentConstraints || m_accumulateMostViolatedConstraints) {
-		cerr << "Rank " << rank << ", epoch " << epoch << ", number of past constraints: " << m_featureValueDiffs.size() << endl;
-		cerr << "Rank " << rank << ", epoch " << epoch << ", number of past violated constraints: " << pastViolatedConstraints << endl;
-	}
-
-	// Add new most violated constraint to the list of current constraints
-	if (m_accumulateMostViolatedConstraints) {
-		if (max_batch_loss != -1) {
-			float modelScoreDiff = max_batch_featureValueDiff.InnerProduct(currWeights);
-			float diff = max_batch_loss - (modelScoreDiff + m_precision);
-    	++violatedConstraintsBefore;
-    	oldDistanceFromOptimum += diff;
-
-    	featureValueDiffs.push_back(max_batch_featureValueDiff);
-    	lossMinusModelScoreDiffs.push_back(max_batch_loss - (modelScoreDiff + m_precision));
-    	all_losses.push_back(max_batch_loss);
-//    	cerr << "new constraint: " << (modelScoreDiff + m_precision) << " !>= " << max_batch_loss << endl;
-		}
-	}
-
-	// Update the list of accumulated most violated constraints
-	if (max_batch_loss != -1) {
-		bool updated = false;
-		for (size_t i = 0; i < m_featureValueDiffs.size(); ++i) {
-			float oldScore = m_featureValueDiffs[i].InnerProduct(currWeights);
-			float newScore = max_batch_featureValueDiff.InnerProduct(currWeights);
-			if (abs(oldScore-newScore) < epsilon) {
-				m_losses[i] = max_batch_loss;
-				updated = true;
-				break;
+		    	if (violated) {
+		    		++violatedConstraintsBefore;
+		    		oldDistanceFromOptimum += diff;
+				}
 			}
 		}
-
-		if (!updated) {
-			m_featureValueDiffs.push_back(max_batch_featureValueDiff);
-			m_losses.push_back(max_batch_loss);
-		}
 	}

 	// run optimisation: compute alphas for all given constraints
@ -284,7 +147,7 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 			statusPlus[1] = -1;
 			statusPlus[2] = -1;
 			return statusPlus;
-	  }
+		}
 	}

 	// apply learning rate
@ -302,7 +165,7 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	}

 	// scale update by BLEU of oracle
-	if (oracleBleuScores.size() == 1 && m_max_number_oracles == 1 && m_scale_update) { // scale only if just 1 oracle is used
+	if (oracleBleuScores.size() == 1 && m_scale_update) {
 		cerr << "Scaling summed update with log10 oracle bleu score " << log10(oracleBleuScores[0]) << endl;
 		summedUpdate.MultiplyEquals(log10(oracleBleuScores[0]));
 	}
@ -338,7 +201,6 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection

 	// most violated constraint in batch
 	ScoreComponentCollection max_batch_featureValueDiff;
-	float max_batch_loss = -1;
 	float max_batch_lossMinusModelScoreDiff = -1;

 	// Make constraints for new hypothesis translations
@ -348,8 +210,7 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection

 	// iterate over input sentences (1 (online) or more (batch))
 	for (size_t i = 0; i < featureValuesHope.size(); ++i) {
-		size_t sentenceId = sentenceIds[i];
-
+		size_t sentenceId = sentenceIds[i];							// keep sentenceId for storing more than 1 oracle..
 		// Pair all hope translations with all fear translations for one input sentence
 		for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
 			for (size_t k = 0; k < featureValuesFear[i].size(); ++k) {
@ -362,20 +223,20 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 				}

 				float loss = bleuScoresHope[i][j] - bleuScoresFear[i][k];
-		    if (m_scale_margin == 1) {
-		    	loss *= bleuScoresHope[i][j];
-		    	cerr << "Scaling margin with oracle bleu score "  << bleuScoresHope[i][j] << endl;
-		    }
-		    else if (m_scale_margin == 2) {
-		    	loss *= log2(bleuScoresHope[i][j]);
-		    	cerr << "Scaling margin with log2 oracle bleu score "  << log2(bleuScoresHope[i][j]) << endl;
-		    }
-		    else if (m_scale_margin == 10) {
-		    	loss *= log10(bleuScoresHope[i][j]);
-		    	cerr << "Scaling margin with log10 oracle bleu score "  << log10(bleuScoresHope[i][j]) << endl;
-		    }
+				if (m_scale_margin == 1) {
+					loss *= bleuScoresHope[i][j];
+					cerr << "Scaling margin with oracle bleu score "  << bleuScoresHope[i][j] << endl;
+				}
+				else if (m_scale_margin == 2) {
+					loss *= log2(bleuScoresHope[i][j]);
+					cerr << "Scaling margin with log2 oracle bleu score "  << log2(bleuScoresHope[i][j]) << endl;
+				}
+				else if (m_scale_margin == 10) {
+					loss *= log10(bleuScoresHope[i][j]);
+					cerr << "Scaling margin with log10 oracle bleu score "  << log10(bleuScoresHope[i][j]) << endl;
+				}

-		  	// check if constraint is violated
+				// check if constraint is violated
 				bool violated = false;
 				bool addConstraint = true;
 				float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
@ -390,18 +251,7 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 				}

 				float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_precision);
-				if (violated) {
-					if (m_accumulateMostViolatedConstraints || m_pastAndCurrentConstraints) {
-						// find the most violated constraint per batch
-						if (lossMinusModelScoreDiff > max_batch_lossMinusModelScoreDiff) {
-							max_batch_lossMinusModelScoreDiff = lossMinusModelScoreDiff;
-							max_batch_featureValueDiff = featureValueDiff;
-							max_batch_loss = loss;
-					  }
-					}
-				}
-
-				if (addConstraint && !m_accumulateMostViolatedConstraints) {
+				if (addConstraint) {
 					featureValueDiffs.push_back(featureValueDiff);
 					lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
 					all_losses.push_back(loss);
@ -415,84 +265,6 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 		}
 	}

-	if (m_pastAndCurrentConstraints || m_accumulateMostViolatedConstraints) {
-		cerr << "Rank " << rank << ", epoch " << epoch << ", number of current constraints: " << featureValueDiffs.size() << endl;
-		cerr << "Rank " << rank << ", epoch " << epoch << ", number of current violated constraints: " << violatedConstraintsBefore << endl;
-	}
-
-	size_t pastViolatedConstraints = 0;
-	// Add constraints from past iterations (BEFORE updating that list)
-	if (m_pastAndCurrentConstraints || m_accumulateMostViolatedConstraints) {
-	  // add all past (most violated) constraints to the list of current constraints, computed with current weights!
-	  for (size_t i = 0; i < m_featureValueDiffs.size(); ++i) {
-	  	float modelScoreDiff = m_featureValueDiffs[i].InnerProduct(currWeights);
-
-	  	// check if constraint is violated
-			bool violated = false;
-			bool addConstraint = true;
-			float diff = m_losses[i] - (modelScoreDiff + m_precision);
-			if (diff > epsilon) {
-				violated = true;
-				cerr << "Rank " << rank << ", epoch " << epoch << ", past violation: " << diff << endl;
-			}
-			else if (m_onlyViolatedConstraints) {
-				addConstraint = false;
-			}
-
-	    if (addConstraint) {
-	    	featureValueDiffs.push_back(m_featureValueDiffs[i]);
-	    	lossMinusModelScoreDiffs.push_back(m_losses[i] - (modelScoreDiff + m_precision));
-	    	all_losses.push_back(m_losses[i]);
-//	    	cerr << "old constraint: " << (modelScoreDiff + m_precision) << " >= " << m_losses[i] << endl;
-
-	    	if (violated) {
-	    		++violatedConstraintsBefore;
-	    		++pastViolatedConstraints;
-	    		oldDistanceFromOptimum += diff;
-	    	}
-	    }
-	  }
-	}
-
-	if (m_pastAndCurrentConstraints || m_accumulateMostViolatedConstraints) {
-		cerr << "Rank " << rank << ", epoch " << epoch << ", number of past constraints: " << m_featureValueDiffs.size() << endl;
-		cerr << "Rank " << rank << ", epoch " << epoch << ", number of past violated constraints: " << pastViolatedConstraints << endl;
-	}
-
-	// Add new most violated constraint to the list of current constraints
-	if (m_accumulateMostViolatedConstraints) {
-		if (max_batch_loss != -1) {
-			float modelScoreDiff = max_batch_featureValueDiff.InnerProduct(currWeights);
-			float diff = max_batch_loss - (modelScoreDiff + m_precision);
-    	++violatedConstraintsBefore;
-    	oldDistanceFromOptimum += diff;
-
-    	featureValueDiffs.push_back(max_batch_featureValueDiff);
-    	lossMinusModelScoreDiffs.push_back(max_batch_loss - (modelScoreDiff + m_precision));
-    	all_losses.push_back(max_batch_loss);
-//    	cerr << "new constraint: " << (modelScoreDiff + m_precision) << " !>= " << max_batch_loss << endl;
-		}
-	}
-
-	// Update the list of accumulated most violated constraints
-	if (max_batch_loss != -1) {
-		bool updated = false;
-		for (size_t i = 0; i < m_featureValueDiffs.size(); ++i) {
-			float oldScore = m_featureValueDiffs[i].InnerProduct(currWeights);
-			float newScore = max_batch_featureValueDiff.InnerProduct(currWeights);
-			if (abs(oldScore-newScore) < epsilon) {
-				m_losses[i] = max_batch_loss;
-				updated = true;
-				break;
-			}
-		}
-
-		if (!updated) {
-			m_featureValueDiffs.push_back(max_batch_featureValueDiff);
-			m_losses.push_back(max_batch_loss);
-		}
-	}
-
 	// run optimisation: compute alphas for all given constraints
 	vector<float> alphas;
 	ScoreComponentCollection summedUpdate;
@ -515,14 +287,14 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection

 	  	// scale update by BLEU of hope translation (only two cases defined at the moment)
 	    if (featureValuesHope.size() == 1 && m_scale_update) { // only defined for batch size 1)
-	    		if (featureValuesHope[0].size() == 1) {
-	    			cerr << "Scaling update with log10 oracle bleu score "  << log10(bleuScoresHope[0][0]) << endl; // only 1 oracle
-	    			update.MultiplyEquals(log10(bleuScoresHope[0][0]));
-	    		} else if (featureValuesFear[0].size() == 1) {
-	    			cerr << "Scaling update with log10 oracle bleu score "  << log10(bleuScoresHope[0][k]) << endl; // k oracles
-	    			update.MultiplyEquals(log10(bleuScoresHope[0][k]));
-	    		}
+	    	if (featureValuesHope[0].size() == 1) {
+	    		cerr << "Scaling update with log10 oracle bleu score "  << log10(bleuScoresHope[0][0]) << endl; // only 1 oracle
+	    		update.MultiplyEquals(log10(bleuScoresHope[0][0]));
+	    	} else if (featureValuesFear[0].size() == 1) {
+	    		cerr << "Scaling update with log10 oracle bleu score "  << log10(bleuScoresHope[0][k]) << endl; // k oracles
+	    		update.MultiplyEquals(log10(bleuScoresHope[0][k]));
 			}
+		}

 	    // sum up update
 	    summedUpdate.PlusEquals(update);
@ -593,7 +365,7 @@ vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection
 }

 vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& currWeights,
-		ScoreComponentCollection& featureValuesHope,
+	ScoreComponentCollection& featureValuesHope,
    ScoreComponentCollection& featureValuesFear,
    float bleuScoreHope,
    float bleuScoreFear,
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -63,17 +63,12 @@ namespace Mira {
 	  MiraOptimiser() :
 		  Optimiser() { }

-  MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, bool scale_update, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, size_t exampleSize, float precision) :
+	  MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, bool scale_update, float precision) :
 		  Optimiser(),
 		  m_onlyViolatedConstraints(onlyViolatedConstraints),
 		  m_slack(slack),
 		  m_scale_margin(scale_margin),
 		  m_scale_update(scale_update),
-		  m_max_number_oracles(maxNumberOracles),
-		  m_accumulateMostViolatedConstraints(accumulateMostViolatedConstraints),
-		  m_pastAndCurrentConstraints(pastAndCurrentConstraints),
-		  m_oracles(exampleSize),
-		  m_bleu_of_oracles(exampleSize),
 		  m_precision(precision) { }
   
     std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights,
@ -125,21 +120,6 @@ namespace Mira {

      size_t m_scale_margin;

-      // keep a list of oracle translations over epochs
-      std::vector < std::vector< Moses::ScoreComponentCollection> > m_oracles;
-
-      std::vector < std::vector< float> > m_bleu_of_oracles;
-
-      size_t m_max_number_oracles;
-
-      // accumulate most violated constraints for every example
-      std::vector< Moses::ScoreComponentCollection> m_featureValueDiffs;
-      std::vector< float> m_losses;
-
-      bool m_accumulateMostViolatedConstraints;
-
-      bool m_pastAndCurrentConstraints;
-
      float m_precision;

      // scale update with log 10 of oracle BLEU score