diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp
index 3c7e381c4..8a04522b9 100644
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@@ -103,7 +103,7 @@ namespace Mira {
     staticData.ReLoadBleuScoreFeatureParameter(bleuObjectiveWeight*bleuScoreWeight);
 
     m_bleuScoreFeature->SetCurrentSourceLength((*m_sentence).GetSize());
-    m_bleuScoreFeature->SetCurrentReference(sentenceid);
+    m_bleuScoreFeature->SetCurrentShortestReference(sentenceid);
 
     //run the decoder
     m_manager = new Moses::Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system); 
@@ -192,12 +192,12 @@ namespace Mira {
   	m_bleuScoreFeature->PrintHistory(out);
   }
 
-  void MosesDecoder::printReferenceLength(const vector<size_t>& ref_ids) {
+/*  void MosesDecoder::printReferenceLength(const vector<size_t>& ref_ids) {
   	m_bleuScoreFeature->PrintReferenceLength(ref_ids);
-  }
+  }*/
 
-  size_t MosesDecoder::getReferenceLength(size_t ref_id) {
-  	return m_bleuScoreFeature->GetReferenceLength(ref_id);
+  size_t MosesDecoder::getClosestReferenceLength(size_t ref_id, int hypoLength) {
+  	return m_bleuScoreFeature->GetClosestReferenceLength(ref_id, hypoLength);
   }
 
   void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
diff --git a/mira/Decoder.h b/mira/Decoder.h
index 067f1cdcb..52ce46af6 100644
--- a/mira/Decoder.h
+++ b/mira/Decoder.h
@@ -66,8 +66,8 @@ class MosesDecoder {
     void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
 //    void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
     void printBleuFeatureHistory(std::ostream& out);
-    void printReferenceLength(const std::vector<size_t>& ref_ids);
-    size_t getReferenceLength(size_t ref_id);
+//    void printReferenceLength(const std::vector<size_t>& ref_ids);
+    size_t getClosestReferenceLength(size_t ref_id, int hypoLength);
     void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
     		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
   		  float scaleByX, float historySmoothing, size_t scheme, float relax_BP);
diff --git a/mira/Main.cpp b/mira/Main.cpp
index a96e02ac4..836bf6bb0 100644
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@@ -352,6 +352,8 @@ int main(int argc, char** argv) {
 		cerr << "Error: Need to select an one of parameters --hope-fear/--model-hope-fear for mira update." << endl;
 		return 1;
 	}
+	if (historyOf1best || historyOfOracles)
+		sentenceLevelBleu = false;
 	if (!sentenceLevelBleu) {
 		if (!historyOf1best && !historyOfOracles) {
 			historyOf1best = true;
@@ -495,7 +497,8 @@ int main(int argc, char** argv) {
 					}
 				}
 
-				size_t reference_length = decoder->getReferenceLength(*sid);
+				size_t ref_length;
+				float avg_ref_length;
 				if (hope_fear || perceptron_update) {
 					// HOPE
 					cerr << "Rank " << rank << ", epoch " << epoch << ", " << hope_n << "best hope translations" << endl;
@@ -504,7 +507,9 @@ int main(int argc, char** argv) {
 							distinctNbest, rank, epoch);
 					size_t current_input_length = decoder->getCurrentInputLength();
 					decoder->cleanup();
-					float hope_length_ratio = (float)oracle.size()/reference_length;
+					ref_length = decoder->getClosestReferenceLength(*sid, oracle.size());
+					avg_ref_length = ref_length;
+					float hope_length_ratio = (float)oracle.size()/ref_length;
 					cerr << ", l-ratio hope: " << hope_length_ratio << endl;
 
 					vector<const Word*> bestModel;
@@ -516,8 +521,9 @@ int main(int argc, char** argv) {
 								distinctNbest, rank, epoch);
 						decoder->cleanup();
 						cerr << endl;
+						ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
 						dev_hypothesis_length += bestModel.size();
-						dev_reference_length += reference_length;
+						dev_reference_length += ref_length;
 					}
 
 					// FEAR
@@ -526,7 +532,10 @@ int main(int argc, char** argv) {
 							featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true,
 							distinctNbest, rank, epoch);
 					decoder->cleanup();
-					float fear_length_ratio = (float)fear.size()/reference_length;
+					ref_length = decoder->getClosestReferenceLength(*sid, fear.size());
+					avg_ref_length += ref_length;
+					avg_ref_length /= 2;
+					float fear_length_ratio = (float)fear.size()/ref_length;
 					cerr << ", l-ratio fear: " << fear_length_ratio << endl;
 					for (size_t i = 0; i < fear.size(); ++i) {
 						delete fear[i];
@@ -541,7 +550,7 @@ int main(int argc, char** argv) {
 					bool skip = false;
 					if (max_length_dev_reference != -1 && (length_diff_hope > max_length_dev_reference || length_diff_fear > max_length_dev_reference))
 						skip = true;
-					if (max_length_dev_hypos != -1 && (length_diff_hope_fear > reference_length * max_length_dev_hypos))
+					if (max_length_dev_hypos != -1 && (length_diff_hope_fear > avg_ref_length * max_length_dev_hypos))
 						skip = true;
 					if (skip) {
 						cerr << "Rank " << rank << ", epoch " << epoch << ", skip example (" << hope_length_ratio << ", " << fear_length_ratio << ", " << length_diff_hope_fear << ").. " << endl;
@@ -579,7 +588,8 @@ int main(int argc, char** argv) {
 					ref_ids.push_back(*sid);
 					decoder->cleanup();
 					oracles.push_back(oracle);
-					float hope_length_ratio = (float)oracle.size()/reference_length;
+					ref_length = decoder->getClosestReferenceLength(*sid, oracle.size());
+					float hope_length_ratio = (float)oracle.size()/ref_length;
 					cerr << ", l-ratio hope: " << hope_length_ratio << endl;
 
 					oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
@@ -592,11 +602,12 @@ int main(int argc, char** argv) {
 							distinctNbest, rank, epoch);
 					decoder->cleanup();
 					oneBests.push_back(bestModel);
-					float model_length_ratio = (float)bestModel.size()/reference_length;
+					ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
+					float model_length_ratio = (float)bestModel.size()/ref_length;
 					cerr << ", l-ratio model: " << model_length_ratio << endl;
 					if (stabiliseLength) {
 						dev_hypothesis_length += bestModel.size();
-						dev_reference_length += reference_length;
+						dev_reference_length += ref_length;
 					}
 
 					// FEAR
@@ -606,7 +617,8 @@ int main(int argc, char** argv) {
 							featureValues[batchPosition], bleuScores[batchPosition], true,
 							distinctNbest, rank, epoch);
 					decoder->cleanup();
-					float fear_length_ratio = (float)fear.size()/reference_length;
+					ref_length = decoder->getClosestReferenceLength(*sid, fear.size());
+					float fear_length_ratio = (float)fear.size()/ref_length;
 					cerr << ", l-ratio fear: " << fear_length_ratio << endl;
 					for (size_t i = 0; i < fear.size(); ++i) {
 						delete fear[i];
diff --git a/moses/src/BleuScoreFeature.cpp b/moses/src/BleuScoreFeature.cpp
index c315f0a84..45993b6ff 100644
--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@@ -98,37 +98,47 @@ void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRe
 void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
 {
 	m_refs.clear();
-    FactorCollection& fc = FactorCollection::Instance();
-    cerr << "Number of reference files: " << refs.size() << endl; 
-    for (size_t file_id = 0; file_id < refs.size(); file_id++) {
-      for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
-          const string& ref = refs[file_id][ref_id];
-          vector<string> refTokens  = Tokenize(ref);
-          m_refs[ref_id] = pair<size_t,NGrams>();
-           pair<size_t,NGrams>& ref_pair = m_refs[ref_id];
-          ref_pair.first = refTokens.size();
-          for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
-              for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
-                  Phrase ngram(Output,1);
-                  for (size_t s_idx = end_idx - order; s_idx < end_idx; s_idx++) {
-                      const Factor* f = fc.AddFactor(Output, 0, refTokens[s_idx]);
-                      Word w;
-                      w.SetFactor(0, f);
-                      ngram.AddWord(w);
-                  }
-                  ref_pair.second[ngram] += 1;
-              }
-          }
-      }
-    }
+	FactorCollection& fc = FactorCollection::Instance();
+	for (size_t file_id = 0; file_id < refs.size(); file_id++) {
+		for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
+			const string& ref = refs[file_id][ref_id];
+			vector<string> refTokens  = Tokenize(ref);
+			if (file_id == 0)
+				m_refs[ref_id] = pair<vector<size_t>,NGrams>();
+			pair<vector<size_t>,NGrams>& ref_pair = m_refs[ref_id];
+			(ref_pair.first).push_back(refTokens.size());
+			for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
+				for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
+					Phrase ngram(Output,1);
+					for (size_t s_idx = end_idx - order; s_idx < end_idx; s_idx++) {
+						const Factor* f = fc.AddFactor(Output, 0, refTokens[s_idx]);
+						Word w;
+						w.SetFactor(0, f);
+						ngram.AddWord(w);
+					}
+					ref_pair.second[ngram] += 1;
+				}
+			}
+   	}
+	}
+
+//	for (size_t i = 0; i < m_refs.size(); ++i) {
+//		cerr << "ref id " << i << ", number of entries: " << (m_refs[i].first).size() << endl;
+//	}
 }
 
 void BleuScoreFeature::SetCurrentSourceLength(size_t source_length) {
     m_cur_source_length = source_length;
 }
 
-void BleuScoreFeature::SetCurrentReference(size_t ref_id) {
-    m_cur_ref_length = m_refs[ref_id].first;
+void BleuScoreFeature::SetCurrentShortestReference(size_t ref_id) {
+		// look for shortest reference
+		int shortestRef = -1;
+		for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
+			if (shortestRef == -1 || (m_refs[ref_id].first)[i] < shortestRef)
+				shortestRef = (m_refs[ref_id].first)[i];
+		}
+    m_cur_ref_length = shortestRef;
     m_cur_ref_ngrams = m_refs[ref_id].second;
 }
 
@@ -163,15 +173,16 @@ void BleuScoreFeature::UpdateHistory(const vector< const Word* >& hypo) {
  * Update history with a batch of translations
  */
 void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypos, vector<size_t>& sourceLengths, vector<size_t>& ref_ids, size_t rank, size_t epoch) {
-	for (size_t batchPosition = 0; batchPosition < hypos.size(); ++batchPosition){
-	    Phrase phrase(Output, hypos[batchPosition]);
+	for (size_t ref_id = 0; ref_id < hypos.size(); ++ref_id){
+	    Phrase phrase(Output, hypos[ref_id]);
 	    std::vector< size_t > ngram_counts(BleuScoreState::bleu_order);
 	    std::vector< size_t > ngram_matches(BleuScoreState::bleu_order);
 
 	    // set current source and reference information for each oracle in the batch
-	    size_t cur_source_length = sourceLengths[batchPosition];
-	    size_t cur_ref_length = m_refs[ref_ids[batchPosition]].first;
-	    NGrams cur_ref_ngrams = m_refs[ref_ids[batchPosition]].second;
+	    size_t cur_source_length = sourceLengths[ref_id];
+	    size_t hypo_length = hypos[ref_id].size();
+	    size_t cur_ref_length = GetClosestReferenceLength(ref_ids[ref_id], hypo_length);
+	    NGrams cur_ref_ngrams = m_refs[ref_ids[ref_id]].second;
 	    cerr << "reference length: " << cur_ref_length << endl;
 
 	    // compute vector c(e;{r_k}):
@@ -184,7 +195,7 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
 	        m_match_history[i] += ngram_matches[i];
 
 	        // do this for last position in batch
-	        if (batchPosition == hypos.size() - 1) {
+	        if (ref_id == hypos.size() - 1) {
 	        	m_count_history[i] *= m_historySmoothing;
 	        	m_match_history[i] *= m_historySmoothing;
 	        }
@@ -192,11 +203,11 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
 
 	    // update counts for reference and target length
 	    m_source_length_history += cur_source_length;
-	    m_target_length_history += hypos[batchPosition].size();
+	    m_target_length_history += hypos[ref_id].size();
 	    m_ref_length_history += cur_ref_length;
 
 	    // do this for last position in batch
-	    if (batchPosition == hypos.size() - 1) {
+	    if (ref_id == hypos.size() - 1) {
 	    	cerr << "Rank " << rank << ", epoch " << epoch << " ,source length history: " << m_source_length_history << " --> " << m_source_length_history * m_historySmoothing << endl;
 	    	cerr << "Rank " << rank << ", epoch " << epoch << " ,target length history: " << m_target_length_history << " --> " << m_target_length_history * m_historySmoothing << endl;
 	    	m_source_length_history *= m_historySmoothing;
@@ -209,15 +220,24 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
 /*
  * Print batch of reference translations
  */
-void BleuScoreFeature::PrintReferenceLength(const vector<size_t>& ref_ids) {
-	for (size_t batchPosition = 0; batchPosition < ref_ids.size(); ++batchPosition){
-	    size_t cur_ref_length = m_refs[ref_ids[batchPosition]].first;
+/*void BleuScoreFeature::PrintReferenceLength(const vector<size_t>& ref_ids) {
+	for (size_t ref_id = 0; ref_id < ref_ids.size(); ++ref_id){
+	    size_t cur_ref_length = (m_refs[ref_ids[ref_id]].first)[0]; // TODO!!
 	    cerr << "reference length: " << cur_ref_length << endl;
 	}
-}
+}*/
 
-size_t BleuScoreFeature::GetReferenceLength(size_t ref_id) {
-	size_t cur_ref_length = m_refs[ref_id].first;
+size_t BleuScoreFeature::GetClosestReferenceLength(size_t ref_id, int hypoLength) {
+	// look for closest reference
+	int currentDist = -1;
+	int closestRef = -1;
+	for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
+		if (closestRef == -1 || abs(hypoLength - (int)(m_refs[ref_id].first)[i]) < currentDist) {
+			closestRef = (m_refs[ref_id].first)[i];
+			currentDist = abs(hypoLength - (int)(m_refs[ref_id].first)[i]);
+		}
+	}
+	size_t cur_ref_length = closestRef;
 	return cur_ref_length;
 }
 
diff --git a/moses/src/BleuScoreFeature.h b/moses/src/BleuScoreFeature.h
index 038c9692d..d572a7c37 100644
--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@@ -44,7 +44,7 @@ class BleuScoreFeature : public StatefulFeatureFunction {
 public:
 
   typedef boost::unordered_map< Phrase, size_t > NGrams;
-  typedef boost::unordered_map<size_t, std::pair<size_t,NGrams> > RefCounts;
+  typedef boost::unordered_map<size_t, std::pair<std::vector<size_t>,NGrams> > RefCounts;
   typedef boost::unordered_map<size_t, NGrams> Matches;
 
 	BleuScoreFeature():
@@ -75,11 +75,11 @@ public:
     void PrintHistory(std::ostream& out) const;
     void LoadReferences(const std::vector< std::vector< std::string > > &);
     void SetCurrentSourceLength(size_t);
-    void SetCurrentReference(size_t);
+    void SetCurrentShortestReference(size_t);
     void UpdateHistory(const std::vector< const Word* >&);
     void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
-    void PrintReferenceLength(const std::vector<size_t>& ref_ids);
-    size_t GetReferenceLength(size_t ref_id);
+//    void PrintReferenceLength(const std::vector<size_t>& ref_ids);
+    size_t GetClosestReferenceLength(size_t ref_id, int hypoLength);
     void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
     		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
   		  float scaleByX, float historySmoothing, size_t scheme, float relaxBP);