refactor handling of accumulated constraints, constraint checking, introduce burn-in for bleu history, sentence-level bleu, bleu score weight

git-svn-id: http://svn.statmt.org/repository/mira@3882 cc96ff50-19ce-11e0-b349-13d7f0bd23df
2024-12-26 13:23:25 +03:00 · 2011-04-26 19:35:06 +00:00 · 2011-04-26 19:35:06 +00:00 · 1964eaf98a
commit 1964eaf98a
parent eaada140a4
7 changed files with 192 additions and 75 deletions
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@ -68,7 +68,7 @@ namespace Mira {
    delete[] mosesargv;
  }
 
-  MosesDecoder::MosesDecoder(const vector<vector<string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing)
+  MosesDecoder::MosesDecoder(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing)
 		: m_manager(NULL) {
 			// force initialisation of the phrase dictionary (TODO: what for?)
 			const StaticData &staticData = StaticData::Instance();
@ -84,7 +84,6 @@ namespace Mira {
      // Add the bleu feature
      m_bleuScoreFeature = new BleuScoreFeature(useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
      (const_cast<TranslationSystem&>(system)).AddFeatureFunction(m_bleuScoreFeature);
-      m_bleuScoreFeature->LoadReferences(refs);
  }
  
  void MosesDecoder::cleanup() {
@ -113,7 +112,7 @@ namespace Mira {

    // set the weight for the bleu feature
    ostringstream bleuWeightStr;
-    bleuWeightStr << bleuObjectiveWeight;
+    bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight);
    PARAM_VEC bleuWeight(1,bleuWeightStr.str());

    staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight);
@ -137,7 +136,7 @@ namespace Mira {
    	bleuScores.push_back(bleuScore);

    	//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
-    	float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
+    	float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
    	cerr << "Rank " << rank << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;

    	Phrase bestPhrase = path.GetTargetPhrase();
@ -179,6 +178,7 @@ namespace Mira {
  vector<float> MosesDecoder::getBleuAndScore(const std::string& source,
                                size_t sentenceid,
                                float bleuObjectiveWeight,
+                                float bleuScoreWeight,
                                bool distinct)
  {
  	StaticData &staticData = StaticData::InstanceNonConst();
@ -191,7 +191,7 @@ namespace Mira {

  	// set the weight for the bleu feature
  	ostringstream bleuWeightStr;
-  	bleuWeightStr << bleuObjectiveWeight;
+  	bleuWeightStr << (bleuObjectiveWeight * bleuScoreWeight);
  	PARAM_VEC bleuWeight(1,bleuWeightStr.str());

  	staticData.GetParameter()->OverwriteParam("weight-bl", bleuWeight);
@ -211,7 +211,7 @@ namespace Mira {
  	vector<float> bleuAndScore;
  	const Moses::TrellisPath &path = **iter;
  	float bleuScore = getBleuScore(path.GetScoreBreakdown());
-  	float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
+  	float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
  	bleuAndScore.push_back(bleuScore);
  	bleuAndScore.push_back(scoreWithoutBleu);
  	return bleuAndScore;
@ -246,6 +246,14 @@ namespace Mira {
 	  m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
  }

+  void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
+  	m_bleuScoreFeature->LoadReferences(refs);
+  }
+
+  void MosesDecoder::printBleuFeatureHistory(std::ostream& out) {
+  	m_bleuScoreFeature->PrintHistory(out);
+  }
+
  vector<float> MosesDecoder::calculateBleuOfCorpus(const vector< vector< const Word*> >& words, vector<size_t>& ref_ids, size_t epoch, size_t rank) {
  	  vector<float> bleu = m_bleuScoreFeature->CalculateBleuOfCorpus(words, ref_ids);
 	  if (bleu.size() > 0) {
--- a/mira/Decoder.h
+++ b/mira/Decoder.h
@ -50,7 +50,7 @@ void initMoses(const std::string& inifile, int debuglevel,  int argc, std::vecto
 **/
 class MosesDecoder {
  public:
-    MosesDecoder(const std::vector<std::vector<std::string> >& refs, bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);
+    MosesDecoder(bool useScaledReference, bool scaleByInputLength, float BPfactor, float historySmoothing);
 	
    //returns the best sentence
    std::vector<const Moses::Word*> getNBest(const std::string& source,
@ -66,10 +66,13 @@ class MosesDecoder {
    std::vector<float> getBleuAndScore(const std::string& source,
 													size_t sentenceid,
 													float bleuObjectiveWeight,
+													float bleuScoreWeight,
 													bool distinct);
    size_t getCurrentInputLength();
    void updateHistory(const std::vector<const Moses::Word*>& words);
    void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
+    void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
+    void printBleuFeatureHistory(std::ostream& out);
    std::vector<float> calculateBleuOfCorpus(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& ref_ids, size_t epoch, size_t rank);
    void setBPfactor(float factor);
    Moses::ScoreComponentCollection getWeights();
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@ -142,7 +142,7 @@ int main(int argc, char** argv) {
 	float marginScaleFactorMin;
 	float min_learning_rate;
 	float min_sentence_update;
-	bool weightedLossFunction;
+	size_t weightedLossFunction;
 	size_t n;
 	size_t batchSize;
 	bool distinctNbest;
@ -172,8 +172,6 @@ int main(int argc, char** argv) {
 	float decrease_sentence_update;
 	bool devBleu;
 	bool normaliseWeights;
-	bool one_constraint;
-	bool one_per_batch;
 	bool print_feature_values;
 	bool stop_dev_bleu;
 	bool stop_approx_dev_bleu;
@ -181,14 +179,25 @@ int main(int argc, char** argv) {
 	bool train_linear_classifier;
 	int updates_per_epoch;
 	bool multiplyA;
+	bool historyOf1best;
+	bool burnIn;
+	string burnInInputFile;
+	vector<string> burnInReferenceFiles;
+	bool sentenceLevelBleu;
+	float bleuScoreWeight;
 	po::options_description desc("Allowed options");
-	desc.add_options()("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example")
+	desc.add_options()
+			("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example")
 			("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
 			("adapt-BP-factor", po::value<bool>(&adapt_BPfactor)->default_value(0), "Set factor to 1 when optimal translation length in reached")
 			("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
 			("base-of-log", po::value<size_t>(&baseOfLog)->default_value(10), "Base for log-ing feature values")
 			("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
+			("bleu-score-weight", po::value<float>(&bleuScoreWeight)->default_value(1.0), "Bleu score weight used in the decoder objective function (on top of the bleu objective weight)")
 			("BP-factor", po::value<float>(&BPfactor)->default_value(1.0), "Increase penalty for short translations")
+			("burn-in", po::value<bool>(&burnIn)->default_value(false), "Do a burn-in of the BLEU history before training")
+			("burn-in-input-file", po::value<string>(&burnInInputFile), "Input file for burn-in phase of BLEU history")
+			("burn-in-reference-files", po::value<vector<string> >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history")
 			("config,f", po::value<string>(&mosesConfigFile), "Moses ini file")
 			("control-updates", po::value<bool>(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error")
 			("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
@ -200,6 +209,7 @@ int main(int argc, char** argv) {
 			("epochs,e", po::value<size_t>(&epochs)->default_value(5), "Number of epochs")
 			("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
 			("hildreth", po::value<bool>(&hildreth)->default_value(true), "Use Hildreth's optimisation algorithm")
+			("history-of-1best", po::value<bool>(&historyOf1best)->default_value(0), "Use the 1best translation to update the history")
 			("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
 			("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
 			("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
@ -214,16 +224,15 @@ int main(int argc, char** argv) {
 			("msf", po::value<float>(&marginScaleFactor)->default_value(1.0), "Margin scale factor, regularises the update by scaling the enforced margin")
 			("msf-min", po::value<float>(&marginScaleFactorMin)->default_value(1.0), "Minimum value that margin is scaled by")
 			("msf-step", po::value<float>(&marginScaleFactorStep)->default_value(0), "Decrease margin scale factor iteratively by the value provided")
-	  ("multiplyA", po::value<bool>(&multiplyA)->default_value(true), "Multiply A with outcome before passing to Hildreth")
-	  ("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
+	    ("multiplyA", po::value<bool>(&multiplyA)->default_value(true), "Multiply A with outcome before passing to Hildreth")
+	    ("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
 			("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
-			("one-constraint", po::value<bool>(&one_constraint)->default_value(false), "Forget about hope and fear and consider only the 1best model translation to formulate a constraint")
-	  ("one-per-batch", po::value<bool>(&one_per_batch)->default_value(false), "Only 1 constraint per batch for params --accumulate-most-violated.. and --past-and-current..")
 			("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
 	    ("past-and-current-constraints", po::value<bool>(&pastAndCurrentConstraints)->default_value(false), "Accumulate most violated constraint per example and use them along all current constraints")
 	    ("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
 	    ("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
 	    ("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by a history of the input lengths")
+	    ("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(false), "Use a sentences level bleu scoring function")
 	    ("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
 	    ("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimizer")
 	    ("slack-max", po::value<float>(&slack_max)->default_value(0), "Maximum slack used")
@ -236,7 +245,7 @@ int main(int argc, char** argv) {
 	    ("updates-per-epoch", po::value<int>(&updates_per_epoch)->default_value(-1), "Accumulate updates and apply them to the weight vector the specified number of times per epoch")
 	    ("use-scaled-reference", po::value<bool>(&useScaledReference)->default_value(true), "Use scaled reference length for comparing target and reference length of phrases")
 	    ("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
-	    ("weighted-loss-function", po::value<bool>(&weightedLossFunction)->default_value(false), "Weight the loss of a hypothesis by its Bleu score")
+	    ("weighted-loss-function", po::value<size_t>(&weightedLossFunction)->default_value(0), "Weight the loss of a hypothesis by its Bleu score")
 	    ("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");

 	po::options_description cmdline_options;
@ -605,14 +614,118 @@ int main(int argc, char** argv) {
 	vector<string> decoder_params;
 	boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
 	initMoses(mosesConfigFile, verbosity, decoder_params.size(), decoder_params);
-	MosesDecoder* decoder = new MosesDecoder(referenceSentences,
-	    useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
+	MosesDecoder* decoder = new MosesDecoder(useScaledReference, scaleByInputLength, BPfactor, historySmoothing);
 	if (normaliseWeights) {
 		ScoreComponentCollection startWeights = decoder->getWeights();
 		startWeights.L1Normalise();
 		decoder->setWeights(startWeights);
 	}

+	if (sentenceLevelBleu) {
+		burnIn = false;
+	}
+
+	if (burnIn) {
+		// load burn-in input and references
+		vector<string> burnInInputSentences;
+		if (!loadSentences(burnInInputFile, burnInInputSentences)) {
+			cerr << "Error: Failed to load burn-in input sentences from " << burnInInputFile << endl;
+			return 1;
+		}
+
+		vector<vector<string> > burnInReferenceSentences(burnInReferenceFiles.size());
+		for (size_t i = 0; i < burnInReferenceFiles.size(); ++i) {
+			if (!loadSentences(burnInReferenceFiles[i], burnInReferenceSentences[i])) {
+				cerr << "Error: Failed to load burn-in reference sentences from "
+				    << burnInReferenceFiles[i] << endl;
+				return 1;
+			}
+			if (burnInReferenceSentences[i].size() != burnInInputSentences.size()) {
+				cerr << "Error: Burn-in input file length (" << burnInInputSentences.size() << ") != ("
+				    << burnInReferenceSentences[i].size() << ") length of burn-in reference file " << i
+				    << endl;
+				return 1;
+			}
+		}
+		decoder->loadReferenceSentences(burnInReferenceSentences);
+
+		vector<size_t> inputLengths;
+		vector<size_t> ref_ids;
+		vector<vector<const Word*> > oracles;
+		vector<vector<const Word*> > oneBests;
+
+		vector<vector<ScoreComponentCollection> > featureValues;
+		vector<vector<float> > bleuScores;
+		vector<ScoreComponentCollection> newFeatureValues;
+		vector<float> newBleuScores;
+		featureValues.push_back(newFeatureValues);
+		bleuScores.push_back(newBleuScores);
+
+		vector<size_t> order;
+		for (size_t i = 0; i < burnInInputSentences.size(); ++i) {
+			order.push_back(i);
+		}
+
+		cerr << "Start burn-in phase for approx. BLEU history.." << endl;
+		if (historyOf1best) {
+			// get 1best translations for the burn-in sentences
+			vector<size_t>::const_iterator sid = order.begin();
+			while (sid != order.end()) {
+				string& input = burnInInputSentences[*sid];
+				vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
+						featureValues[0], bleuScores[0], true,
+						distinctNbest, rank);
+				inputLengths.push_back(decoder->getCurrentInputLength());
+				ref_ids.push_back(*sid);
+				decoder->cleanup();
+				oneBests.push_back(bestModel);
+				++sid;
+			}
+
+			// update history
+			decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, 0);
+
+			// clean up 1best translations after updating history
+			for (size_t i = 0; i < oracles.size(); ++i) {
+				for (size_t j = 0; j < oracles[i].size(); ++j) {
+					delete oracles[i][j];
+				}
+			}
+		}
+		else {
+			// get oracle translations for the burn-in sentences
+			vector<size_t>::const_iterator sid = order.begin();
+			while (sid != order.end()) {
+				string& input = burnInInputSentences[*sid];
+				vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight,
+						featureValues[0], bleuScores[0], true,
+						distinctNbest, rank);
+				inputLengths.push_back(decoder->getCurrentInputLength());
+				ref_ids.push_back(*sid);
+				decoder->cleanup();
+				oracles.push_back(oracle);
+				++sid;
+			}
+
+			// update history
+			decoder->updateHistory(oracles, inputLengths, ref_ids, rank, 0);
+
+			// clean up oracle translations after updating history
+			for (size_t i = 0; i < oracles.size(); ++i) {
+				for (size_t j = 0; j < oracles[i].size(); ++j) {
+					delete oracles[i][j];
+				}
+			}
+		}
+
+		cerr << "Bleu feature history after burn-in: " << endl;
+		decoder->printBleuFeatureHistory(cerr);
+		decoder->loadReferenceSentences(referenceSentences);
+	}
+	else {
+		decoder->loadReferenceSentences(referenceSentences);
+	}
+
 	// Optionally shuffle the sentences
 	vector<size_t> order;
 	if (rank == 0) {
@ -691,8 +804,7 @@ int main(int argc, char** argv) {
 		cerr << "Optimising using Mira" << endl;
 		optimiser = new MiraOptimiser(n, hildreth, marginScaleFactor,
 		    onlyViolatedConstraints, slack, weightedLossFunction, maxNumberOracles,
-					      accumulateMostViolatedConstraints, pastAndCurrentConstraints, one_per_batch,
-		    order.size());
+					      accumulateMostViolatedConstraints, pastAndCurrentConstraints, order.size());
 		if (hildreth) {
 			cerr << "Using Hildreth's optimisation algorithm.." << endl;
 		}
@ -777,6 +889,7 @@ int main(int argc, char** argv) {
 			vector<size_t> oraclePositions;
 			vector<float> oracleBleuScores;
 			vector<vector<const Word*> > oracles;
+			vector<vector<const Word*> > oneBests;
 			vector<ScoreComponentCollection> oracleFeatureValues;
 			vector<size_t> inputLengths;
 			vector<size_t> ref_ids;
@ -795,52 +908,25 @@ int main(int argc, char** argv) {
 				featureValues.push_back(newFeatureValues);
 				bleuScores.push_back(newBleuScores);

-				if (one_constraint) {
-					cerr << "Rank " << rank << ", run decoder to get 1best wrt model score" << endl;
-					vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0,
-							1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
-							distinctNbest, rank);
-					inputLengths.push_back(decoder->getCurrentInputLength());
-					ref_ids.push_back(*sid);
-					all_ref_ids.push_back(*sid);
-					allBestModelScore.push_back(bestModel);
-					decoder->cleanup();
-					cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;
-
-					// HOPE
-					cerr << "Rank " << rank << ", run decoder to get nbest hope translations" << endl;
-					size_t oraclePos = featureValues[batchPosition].size();
-					oraclePositions.push_back(oraclePos);
-					vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0,
-							1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
-							distinctNbest, rank);
-					decoder->cleanup();
-					oracles.push_back(oracle);
-					cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl;
-
-					oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
-					float oracleBleuScore = bleuScores[batchPosition][oraclePos];
-					oracleBleuScores.push_back(oracleBleuScore);
-				}
-				else {
 				// MODEL
 				cerr << "Rank " << rank << ", run decoder to get nbest wrt model score" << endl;
-				vector<const Word*> bestModel = decoder->getNBest(input, *sid, n, 0.0,
-									1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
+				vector<const Word*> bestModel = decoder->getNBest(input, *sid, n, 0.0, bleuScoreWeight,
+									featureValues[batchPosition], bleuScores[batchPosition], true,
 									distinctNbest, rank);
 				inputLengths.push_back(decoder->getCurrentInputLength());
 				ref_ids.push_back(*sid);
 				all_ref_ids.push_back(*sid);
 				allBestModelScore.push_back(bestModel);
 				decoder->cleanup();
+				oneBests.push_back(bestModel);
 				cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;

 				// HOPE
 				cerr << "Rank " << rank << ", run decoder to get nbest hope translations" << endl;
 				size_t oraclePos = featureValues[batchPosition].size();
 				oraclePositions.push_back(oraclePos);
-				vector<const Word*> oracle = decoder->getNBest(input, *sid, n, 1.0,
-									1.0, featureValues[batchPosition], bleuScores[batchPosition], true,
+				vector<const Word*> oracle = decoder->getNBest(input, *sid, n, 1.0, bleuScoreWeight,
+									featureValues[batchPosition], bleuScores[batchPosition], true,
 									distinctNbest, rank);
 				decoder->cleanup();
 				oracles.push_back(oracle);
@ -853,7 +939,7 @@ int main(int argc, char** argv) {
 				// FEAR
 				cerr << "Rank " << rank << ", run decoder to get nbest fear translations" << endl;
 				size_t fearPos = featureValues[batchPosition].size();
-				vector<const Word*> fear = decoder->getNBest(input, *sid, n, -1.0, 1.0,
+				vector<const Word*> fear = decoder->getNBest(input, *sid, n, -1.0, bleuScoreWeight,
 									featureValues[batchPosition], bleuScores[batchPosition], true,
 									distinctNbest, rank);
 				decoder->cleanup();
@ -865,7 +951,6 @@ int main(int argc, char** argv) {
 				for (size_t i = 0; i < fear.size(); ++i) {
 					delete fear[i];
 				}
-				}

 				cerr << "Rank " << rank << ", sentence " << *sid << ", best model Bleu (approximate sentence bleu): "  << bleuScores[batchPosition][0] << endl;
 				summedApproxBleu += bleuScores[batchPosition][0];
@ -908,7 +993,7 @@ int main(int argc, char** argv) {
 			vector< vector <float > > bestModelOld_batch;
 			for (size_t i = 0; i < actualBatchSize; ++i) {
 				string& input = inputSentences[*current_sid_start + i];
-				vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, distinctNbest);
+				vector <float> bestModelOld = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
 				bestModelOld_batch.push_back(bestModelOld);
 				decoder->cleanup();
 			}
@ -928,16 +1013,9 @@ int main(int argc, char** argv) {
 			cerr << "\nRank " << rank << ", run optimiser:" << endl;
 			ScoreComponentCollection oldWeights(mosesWeights);
 			vector<int> update_status;
-			if (one_constraint) {
-				update_status = optimiser->updateWeightsAnalytically(mosesWeights, featureValues[0][0],
-							    losses[0][0], oracleFeatureValues[0], oracleBleuScores[0], ref_ids[0],
-							    learning_rate, max_sentence_update, rank, epoch, controlUpdates);
-			}
-			else {
-			 update_status = optimiser->updateWeights(mosesWeights, featureValues,
+			update_status = optimiser->updateWeights(mosesWeights, featureValues,
 			    losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids,
 			    learning_rate, max_sentence_update, rank, epoch, updates_per_epoch, controlUpdates);
-			}

 			if (update_status[0] == 1) {
 				cerr << "Rank " << rank << ", no update for batch" << endl;
@ -985,7 +1063,7 @@ int main(int argc, char** argv) {
 					vector<float> bestModelNew;
 					for (size_t i = 0; i < actualBatchSize; ++i) {
 						string& input = inputSentences[*current_sid_start + i];
-						bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, distinctNbest);
+						bestModelNew = decoder->getBleuAndScore(input, *current_sid_start + i, 0.0, bleuScoreWeight, distinctNbest);
 						decoder->cleanup();
 						cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model bleu, old: " << bestModelOld_batch[i][0] << ", new: " << bestModelNew[0] << endl;
 						cerr << "Rank " << rank << ", epoch " << epoch << ", 1best model score, old: " << bestModelOld_batch[i][1] << ", new: " << bestModelNew[1] << endl;
@ -993,11 +1071,21 @@ int main(int argc, char** argv) {
 				}
 			}

-			// update history (for approximate document Bleu)
-			for (size_t i = 0; i < oracles.size(); ++i) {
-				cerr << "Rank " << rank << ", oracle length: " << oracles[i].size() << " ";
+			if (!sentenceLevelBleu) {
+				// update history (for approximate document Bleu)
+				if (historyOf1best) {
+					for (size_t i = 0; i < oneBests.size(); ++i) {
+						cerr << "Rank " << rank << ", 1best length: " << oneBests[i].size() << " ";
+					}
+					decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, epoch);
+				}
+				else {
+					for (size_t i = 0; i < oracles.size(); ++i) {
+						cerr << "Rank " << rank << ", oracle length: " << oracles[i].size() << " ";
+					}
+					decoder->updateHistory(oracles, inputLengths, ref_ids, rank, epoch);
+				}
 			}
-			decoder->updateHistory(oracles, inputLengths, ref_ids, rank, epoch);

 			// clean up oracle translations after updating history
 			for (size_t i = 0; i < oracles.size(); ++i) {
@ -1136,6 +1224,10 @@ int main(int argc, char** argv) {
 			}// end dumping
 		} // end of shard loop, end of this epoch

+
+		cerr << "Bleu feature history after epoch " <<  epoch << endl;
+		decoder->printBleuFeatureHistory(cerr);
+
 		size_t sumUpdates;
 		size_t *sendbuf_uint, *recvbuf_uint;
 		sendbuf_uint = (size_t *) malloc(sizeof(size_t));
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -88,7 +88,7 @@ namespace Mira {
 	  MiraOptimiser() :
 		  Optimiser() { }

-  MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float slack, bool weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, bool one_per_batch, size_t exampleSize) :
+  MiraOptimiser(size_t n, bool hildreth, float marginScaleFactor, bool onlyViolatedConstraints, float slack, size_t weightedLossFunction, size_t maxNumberOracles, bool accumulateMostViolatedConstraints, bool pastAndCurrentConstraints, size_t exampleSize) :
 		  Optimiser(),
 		  m_n(n),
 		  m_hildreth(hildreth),
@ -99,7 +99,6 @@ namespace Mira {
 		  m_max_number_oracles(maxNumberOracles),
 		  m_accumulateMostViolatedConstraints(accumulateMostViolatedConstraints),
 		  m_pastAndCurrentConstraints(pastAndCurrentConstraints),
-		  m_one_per_batch(one_per_batch),  
 		  m_oracles(exampleSize),
 		  m_bleu_of_oracles(exampleSize) { }

@ -166,7 +165,7 @@ namespace Mira {
      // regularise Hildreth updates
      float m_slack;

-      bool m_weightedLossFunction;
+      size_t m_weightedLossFunction;

      // index of oracle translation in hypothesis matrix
      std::vector<size_t> m_oracleIndices;
@ -180,14 +179,13 @@ namespace Mira {

      // accumulate most violated constraints for every example
      std::vector< Moses::ScoreComponentCollection> m_featureValueDiffs;
-      std::vector< float> m_lossMarginDistances;
+      std::vector< float> m_losses;
+

      bool m_accumulateMostViolatedConstraints;

      bool m_pastAndCurrentConstraints;

-      bool m_one_per_batch;
-
      Moses::ScoreComponentCollection m_accumulatedUpdates;
  };
 }
--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@ -95,8 +95,20 @@ BleuScoreFeature::BleuScoreFeature(bool useScaledReference, bool scaleByInputLen
                                 m_BP_factor(BPfactor),
                                 m_historySmoothing(historySmoothing) {}

+
+void BleuScoreFeature::PrintHistory(std::ostream& out) const {
+	out << "source length history=" << m_source_length_history << endl;
+	out << "target length history=" << m_target_length_history << endl;
+	out << "ref length history=" << m_ref_length_history << endl;
+
+  for (size_t i = 0; i < BleuScoreState::bleu_order; ++i) {
+    out << "match history/count history (" << i << "):" << m_match_history[i] << "/" << m_count_history[i] << endl;
+  }
+}
+
 void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
 {
+		m_refs.clear();
    FactorCollection& fc = FactorCollection::Instance();
    for (size_t file_id = 0; file_id < refs.size(); file_id++) {
      for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@ -62,6 +62,7 @@ public:
        return 1;
    }

+    void PrintHistory(std::ostream& out) const;
    void LoadReferences(const std::vector< std::vector< std::string > > &);
    void SetCurrentSourceLength(size_t);
    void SetCurrentReference(size_t);
--- a/moses/src/FeatureVector.cpp
+++ b/moses/src/FeatureVector.cpp
@ -178,8 +178,11 @@ namespace Moses {
 /*      if (i->first != DEFAULT_NAME && i->second != 0.0) {
          out << i->first << "=" << value << ", ";
      }*/
-      if (i->first != DEFAULT_NAME) {
+/*      if (i->first != DEFAULT_NAME) {
      	out << i->first << "=" << value << ", ";
+      }*/
+      if (i->first != DEFAULT_NAME) {
+      	out << value << ", ";
      }
    }
    out << "}";