diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp
index 57acdff89..0e21cbed1 100644
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@@ -184,9 +184,9 @@ namespace Mira {
 	  m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
   }
 
-  void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
+/*  void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
   	m_bleuScoreFeature->LoadReferences(refs);
-  }
+  }*/
 
   void MosesDecoder::printBleuFeatureHistory(std::ostream& out) {
   	m_bleuScoreFeature->PrintHistory(out);
@@ -200,9 +200,11 @@ namespace Mira {
   	return m_bleuScoreFeature->GetReferenceLength(ref_id);
   }
 
-  void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
+  void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
+  		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
 		  float scaleByX, float historySmoothing, size_t scheme, float relax_BP) {
-	  m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength, scaleByTargetLength,
+	  m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength,
+	  		scaleByTargetLengthLinear, scaleByTargetLengthTrend,
 			  scaleByX, historySmoothing, scheme, relax_BP);
   }
 } 
diff --git a/mira/Decoder.h b/mira/Decoder.h
index fc1e82b8c..067f1cdcb 100644
--- a/mira/Decoder.h
+++ b/mira/Decoder.h
@@ -64,11 +64,12 @@ class MosesDecoder {
     size_t getCurrentInputLength();
     void updateHistory(const std::vector<const Moses::Word*>& words);
     void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
-    void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
+//    void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
     void printBleuFeatureHistory(std::ostream& out);
     void printReferenceLength(const std::vector<size_t>& ref_ids);
     size_t getReferenceLength(size_t ref_id);
-    void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
+    void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
+    		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
   		  float scaleByX, float historySmoothing, size_t scheme, float relax_BP);
     Moses::ScoreComponentCollection getWeights();
     void setWeights(const Moses::ScoreComponentCollection& weights);
diff --git a/mira/Main.cpp b/mira/Main.cpp
index bc38466d1..a96e02ac4 100644
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@@ -82,7 +82,8 @@ int main(int argc, char** argv) {
 	float historySmoothing;
 	bool scaleByInputLength;
 	bool scaleByReferenceLength;
-	bool scaleByTargetLength;
+	bool scaleByTargetLengthLinear;
+	bool scaleByTargetLengthTrend;
 	bool scaleByAvgLength;
 	float scaleByX;
 	float slack;
@@ -119,6 +120,8 @@ int main(int argc, char** argv) {
 	float max_length_dev_hypos;
 	float max_length_dev_reference;
 	float relax_BP;
+	bool stabiliseLength;
+	bool delayUpdates;
 	po::options_description desc("Allowed options");
 	desc.add_options()
 		("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
@@ -133,6 +136,7 @@ int main(int argc, char** argv) {
 		("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
 		("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
 		("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
+		("delay-updates", po::value<bool>(&delayUpdates)->default_value(false), "Delay all updates until the end of an epoch")
 		("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
 		("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
 		("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
@@ -164,7 +168,8 @@ int main(int argc, char** argv) {
 		("relax-BP", po::value<float>(&relax_BP)->default_value(1), "Relax the BP by setting this value between 0 and 1")
 		("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by (a history of) the input length")
 		("scale-by-reference-length", po::value<bool>(&scaleByReferenceLength)->default_value(false), "Scale BLEU by (a history of) the reference length")
-		("scale-by-target-length", po::value<bool>(&scaleByTargetLength)->default_value(false), "Scale BLEU by (a history of) the target length")
+		("scale-by-target-length-linear", po::value<bool>(&scaleByTargetLengthLinear)->default_value(false), "Scale BLEU by (a history of) the target length (linear future estimate)")
+		("scale-by-target-length-trend", po::value<bool>(&scaleByTargetLengthTrend)->default_value(false), "Scale BLEU by (a history of) the target length (trend-based future estimate)")
 		("scale-by-avg-length", po::value<bool>(&scaleByAvgLength)->default_value(false), "Scale BLEU by (a history of) the average of input and reference length")
 		("scale-by-x", po::value<float>(&scaleByX)->default_value(1), "Scale the BLEU score by value x")
 		("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
@@ -174,6 +179,7 @@ int main(int argc, char** argv) {
 		("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
 		("slack-min", po::value<float>(&slack_min)->default_value(0.01), "Minimum slack used")
 		("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
+		("stabilise-length", po::value<bool>(&stabiliseLength)->default_value(false), "Stabilise word penalty when length ratio >= 1")
 		("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
 		("threads", po::value<int>(&threadcount)->default_value(1), "Number of threads used")
 		("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
@@ -268,11 +274,7 @@ int main(int argc, char** argv) {
 		}
 	}
 
-	if (scaleByReferenceLength)
-		scaleByInputLength = false;
-	if (scaleByTargetLength)
-		scaleByInputLength = false;
-	if (scaleByAvgLength)
+	if (scaleByReferenceLength || scaleByTargetLengthLinear || scaleByTargetLengthTrend || scaleByAvgLength)
 		scaleByInputLength = false;
 
 	// initialise Moses
@@ -285,7 +287,8 @@ int main(int argc, char** argv) {
 	vector<string> decoder_params;
 	boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
 	MosesDecoder* decoder = new MosesDecoder(mosesConfigFile, verbosity, decoder_params.size(), decoder_params);
-	decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength, scaleByTargetLength,
+	decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength,
+			scaleByTargetLengthLinear, scaleByTargetLengthTrend,
 			scaleByX, historySmoothing, bleu_smoothing_scheme, relax_BP);
 	if (normaliseWeights) {
 		ScoreComponentCollection startWeights = decoder->getWeights();
@@ -409,6 +412,12 @@ int main(int argc, char** argv) {
 	ScoreComponentCollection mixedAverageWeightsPrevious;
 	ScoreComponentCollection mixedAverageWeightsBeforePrevious;
 
+	// when length ratio >= 1, set this to true
+	bool fixLength = false;
+
+	// for accumulating delayed updates
+	ScoreComponentCollection delayedWeightUpdates;
+
 	bool stop = false;
 //	int sumStillViolatedConstraints;
 	float *sendbuf, *recvbuf;
@@ -427,6 +436,12 @@ int main(int argc, char** argv) {
 		// number of weight dumps this epoch
 		size_t weightEpochDump = 0;
 
+		// sum lengths of dev hypothesis/references to calculate translation length ratio for this epoch
+		size_t dev_hypothesis_length = 0;
+		size_t dev_reference_length = 0;
+
+		delayedWeightUpdates.ZeroAll();
+
 		size_t shardPosition = 0;
 		vector<size_t>::const_iterator sid = shard.begin();
 		while (sid != shard.end()) {
@@ -460,7 +475,7 @@ int main(int argc, char** argv) {
 			for (size_t batchPosition = 0; batchPosition < batchSize && sid
 			    != shard.end(); ++batchPosition) {
 				string& input = inputSentences[*sid];
-				const vector<string>& refs = referenceSentences[*sid];
+//				const vector<string>& refs = referenceSentences[*sid];
 				cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl;
 
 				vector<ScoreComponentCollection> newFeatureValues;
@@ -474,7 +489,7 @@ int main(int argc, char** argv) {
 					featureValuesFear.push_back(newFeatureValues);
 					bleuScoresHope.push_back(newBleuScores);
 					bleuScoresFear.push_back(newBleuScores);
-					if (historyOf1best) {
+					if (historyOf1best || stabiliseLength) {
 						dummyFeatureValues.push_back(newFeatureValues);
 						dummyBleuScores.push_back(newBleuScores);
 					}
@@ -493,13 +508,16 @@ int main(int argc, char** argv) {
 					cerr << ", l-ratio hope: " << hope_length_ratio << endl;
 
 					vector<const Word*> bestModel;
-					if (historyOf1best) {
+					if (historyOf1best || stabiliseLength) {
 						// MODEL (for updating the history only, using dummy vectors)
-						cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history)" << endl;
+						cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history or length stabilisation)" << endl;
 						bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
 								dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
 								distinctNbest, rank, epoch);
 						decoder->cleanup();
+						cerr << endl;
+						dev_hypothesis_length += bestModel.size();
+						dev_reference_length += reference_length;
 					}
 
 					// FEAR
@@ -576,6 +594,10 @@ int main(int argc, char** argv) {
 					oneBests.push_back(bestModel);
 					float model_length_ratio = (float)bestModel.size()/reference_length;
 					cerr << ", l-ratio model: " << model_length_ratio << endl;
+					if (stabiliseLength) {
+						dev_hypothesis_length += bestModel.size();
+						dev_reference_length += reference_length;
+					}
 
 					// FEAR
 					cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl;
@@ -622,6 +644,19 @@ int main(int argc, char** argv) {
 				    break;
 				  }
 
+				// set word penalty to 0 before optimising (if 'stabilise-length' is active)
+				if (fixLength) {
+					iter = featureFunctions.begin();
+					for (; iter != featureFunctions.end(); ++iter) {
+						if ((*iter)->GetScoreProducerWeightShortName() == "w") {
+							ignoreWPFeature(featureValues, (*iter));
+							ignoreWPFeature(featureValuesHope, (*iter));
+							ignoreWPFeature(featureValuesFear, (*iter));
+							break;
+						}
+					}
+				}
+
 				// take logs of feature values
 				if (logFeatureValues) {
 					takeLogs(featureValuesHope, baseOfLog);
@@ -654,24 +689,28 @@ int main(int argc, char** argv) {
 				// Run optimiser on batch:
 				VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
 				size_t update_status;
+				ScoreComponentCollection weightUpdate;
 				if (perceptron_update) {
 					vector<vector<float> > dummy1;
-					update_status = optimiser->updateWeightsHopeFear(mosesWeights,
+					update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
 							featureValuesHope, featureValuesFear, dummy1, dummy1, learning_rate, rank, epoch);
 				}
 				else if (hope_fear) {
-					update_status = optimiser->updateWeightsHopeFear(mosesWeights,
+					update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
 							featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, learning_rate, rank, epoch);
 				}
 				else {
 					// model_hope_fear
-					update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights,
+					update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, weightUpdate,
 							featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, learning_rate, rank, epoch);
 				}
 
 //			sumStillViolatedConstraints += update_status;
 
 				if (update_status == 0) {	 // if weights were updated
+					// apply weight update
+					mosesWeights.PlusEquals(weightUpdate);
+
 					if (normaliseWeights) {
 						mosesWeights.L1Normalise();
 					}
@@ -690,8 +729,11 @@ int main(int argc, char** argv) {
 						mosesWeights = averageWeights;
 					}
 
-					// set new Moses weights
-					decoder->setWeights(mosesWeights);
+					if (delayUpdates)
+						delayedWeightUpdates.PlusEquals(weightUpdate);
+					else
+						// set new Moses weights
+						decoder->setWeights(mosesWeights);
 				}
 
 				// update history (for approximate document Bleu)
@@ -802,8 +844,25 @@ int main(int argc, char** argv) {
 			    }
 			  }
 			}// end dumping
+
 		} // end of shard loop, end of this epoch
 
+		if (delayUpdates) {
+			// apply all updates from this epoch to the weight vector
+			ScoreComponentCollection mosesWeights = decoder->getWeights();
+			mosesWeights.PlusEquals(delayedWeightUpdates);
+			decoder->setWeights(mosesWeights);
+			cerr << "Rank " << rank << ", epoch " << epoch << ", delayed update, new moses weights: " << mosesWeights << endl;
+		}
+
+		if (stabiliseLength && !fixLength) {
+			float lengthRatio = (float)(dev_hypothesis_length+1) / dev_reference_length;
+			if (lengthRatio >= 1) {
+				cerr << "Rank " << rank << ", epoch " << epoch << ", length ratio >= 1, fixing word penalty. " << endl;
+				fixLength = 1;
+			}
+		}
+
 		if (verbosity > 0) {
 			cerr << "Bleu feature history after epoch " <<  epoch << endl;
 			decoder->printBleuFeatureHistory(cerr);
@@ -840,28 +899,19 @@ int main(int argc, char** argv) {
 				if (rank == 0 && (epoch >= 2)) {
 					ScoreComponentCollection firstDiff(mixedAverageWeights);
 					firstDiff.MinusEquals(mixedAverageWeightsPrevious);
-					VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << endl);
+					VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << 
+						" (max: " << firstDiff.GetLInfNorm() << ")" << endl);
 					ScoreComponentCollection secondDiff(mixedAverageWeights);
 					secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious);
-					VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << endl << endl);
+					VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << 
+						" (max: " << secondDiff.GetLInfNorm() << ")" << endl << endl);
 
 					// check whether stopping criterion has been reached
 					// (both difference vectors must have all weight changes smaller than min_weight_change)
-					FVector changes1 = firstDiff.GetScoresVector();
-					FVector changes2 = secondDiff.GetScoresVector();
-					FVector::const_iterator iterator1 = changes1.cbegin();
-					FVector::const_iterator iterator2 = changes2.cbegin();
-					while (iterator1 != changes1.cend()) {
-						if (abs((*iterator1).second) >= min_weight_change || abs(
-								(*iterator2).second) >= min_weight_change) {
-							reached = false;
-							break;
-						}
-
-						++iterator1;
-						++iterator2;
-					}
-
+					if (firstDiff.GetLInfNorm() >= min_weight_change)
+					  reached = false;
+					if (secondDiff.GetLInfNorm() >= min_weight_change)
+					  reached = false;
 					if (reached) {
 						// stop MIRA
 						stop = true;
@@ -991,16 +1041,20 @@ void printFeatureValues(vector<vector<ScoreComponentCollection> > &featureValues
 }
 
 void ignoreCoreFeatures(vector<vector<ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap) {
-	for (size_t i = 0; i < featureValues.size(); ++i) {
+	for (size_t i = 0; i < featureValues.size(); ++i)
 		for (size_t j = 0; j < featureValues[i].size(); ++j) {
 			// set all core features to 0
 			StrFloatMap::iterator p;
 			for(p = coreWeightMap.begin(); p!=coreWeightMap.end(); ++p)
-			{
 				featureValues[i][j].Assign(p->first, 0);
-			}
 		}
-	}
+}
+
+void ignoreWPFeature(vector<vector<ScoreComponentCollection> > &featureValues, const ScoreProducer* sp) {
+	for (size_t i = 0; i < featureValues.size(); ++i)
+		for (size_t j = 0; j < featureValues[i].size(); ++j)
+			// set WP feature to 0
+			featureValues[i][j].Assign(sp, 0);
 }
 
 void takeLogs(vector<vector<ScoreComponentCollection> > &featureValues, size_t base) {
diff --git a/mira/Main.h b/mira/Main.h
index 68de9b9c9..4fd859b95 100644
--- a/mira/Main.h
+++ b/mira/Main.h
@@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include "ScoreComponentCollection.h"
 #include "Word.h"
+#include "ScoreProducer.h"
 
 typedef std::map<const std::string, float> StrFloatMap;
 typedef std::pair<const std::string, float> StrFloatPair;
@@ -46,6 +47,7 @@ bool loadWeights(const std::string& filename, StrFloatMap& coreWeightMap);
 bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size);
 void printFeatureValues(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues);
 void ignoreCoreFeatures(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap);
+void ignoreWPFeature(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, const Moses::ScoreProducer* sp);
 void takeLogs(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t base);
 void deleteTranslations(std::vector<std::vector<const Moses::Word*> > &translations);
 
diff --git a/mira/MiraOptimiser.cpp b/mira/MiraOptimiser.cpp
index 17f2b6dad..6dd88ba5d 100644
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@@ -7,7 +7,9 @@ using namespace std;
 
 namespace Mira {
 
-size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
+size_t MiraOptimiser::updateWeights(
+		ScoreComponentCollection& currWeights,
+		ScoreComponentCollection& weightUpdate,
     const vector<vector<ScoreComponentCollection> >& featureValues,
     const vector<vector<float> >& losses,
     const vector<vector<float> >& bleuScores,
@@ -142,9 +144,7 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	}
 
 	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
-
-	// apply update to weight vector
-	currWeights.PlusEquals(summedUpdate);
+	weightUpdate.PlusEquals(summedUpdate);
 
 	// Sanity check: are there still violated constraints after optimisation?
 /*	int violatedConstraintsAfter = 0;
@@ -164,7 +164,9 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	return 0;
 }
 
-size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+size_t MiraOptimiser::updateWeightsHopeFear(
+		Moses::ScoreComponentCollection& currWeights,
+		Moses::ScoreComponentCollection& weightUpdate,
 		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
 		const std::vector<std::vector<float> >& bleuScoresHope,
@@ -299,9 +301,7 @@ size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& cur
 	}
 
 	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
-
-	// apply update to weight vector
-	currWeights.PlusEquals(summedUpdate);
+	weightUpdate.PlusEquals(summedUpdate);
 
 	// Sanity check: are there still violated constraints after optimisation?
 /*	int violatedConstraintsAfter = 0;
diff --git a/mira/Optimiser.h b/mira/Optimiser.h
index 5827f1f5e..709c876e3 100644
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@@ -30,7 +30,9 @@ namespace Mira {
     public:
       Optimiser() {}
 
-      virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+      virtual size_t updateWeightsHopeFear(
+      		Moses::ScoreComponentCollection& currWeights,
+      		Moses::ScoreComponentCollection& weightUpdate,
 				  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 				  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
 				  const std::vector<std::vector<float> >& bleuScoresHope,
@@ -42,7 +44,9 @@ namespace Mira {
  
   class Perceptron : public Optimiser {
     public:
-			virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+			virtual size_t updateWeightsHopeFear(
+					Moses::ScoreComponentCollection& currWeights,
+					Moses::ScoreComponentCollection& weightUpdate,
 					const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 					const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
 					const std::vector<std::vector<float> >& bleuScoresHope,
@@ -66,6 +70,7 @@ namespace Mira {
 		  m_margin_slack(margin_slack) { }
    
 	  size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
+	  								Moses::ScoreComponentCollection& weightUpdate,
       						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
       						  const std::vector<std::vector<float> >& losses,
       						  const std::vector<std::vector<float> >& bleuScores,
@@ -75,6 +80,7 @@ namespace Mira {
       						  size_t rank,
       						  size_t epoch);
      virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+    		 	 	 	 	 	 	Moses::ScoreComponentCollection& weightUpdate,
       						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
       						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
       						  const std::vector<std::vector<float> >& bleuScoresHope,
diff --git a/mira/Perceptron.cpp b/mira/Perceptron.cpp
index 322a0984e..a2bd7cde3 100644
--- a/mira/Perceptron.cpp
+++ b/mira/Perceptron.cpp
@@ -24,7 +24,9 @@ using namespace std;
 
 namespace Mira {
 
-size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
+size_t Perceptron::updateWeightsHopeFear(
+		ScoreComponentCollection& currWeights,
+		ScoreComponentCollection& weightUpdate,
 		const vector< vector<ScoreComponentCollection> >& featureValuesHope,
 		const vector< vector<ScoreComponentCollection> >& featureValuesFear,
 		const vector< vector<float> >& dummy1,
@@ -39,7 +41,7 @@ size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
 	featureValueDiff.MinusEquals(featureValuesFear[0][0]);
 	cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
 	featureValueDiff.MultiplyEquals(perceptron_learning_rate);
-	currWeights.PlusEquals(featureValueDiff);
+	weightUpdate.PlusEquals(featureValueDiff);
 	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl;
 	return 0;
 }
diff --git a/mira/training-expt.perl b/mira/training-expt.perl
index afe8b09e7..df7df44a9 100755
--- a/mira/training-expt.perl
+++ b/mira/training-expt.perl
@@ -72,6 +72,9 @@ my $moses_ini_file = &param_required("train.moses-ini-file");
 my $input_file = &param_required("train.input-file");
 &check_exists ("train input file", $input_file);
 my $reference_files = &param_required("train.reference-files");
+for my $ref (glob $reference_files . "*") {
+    &check_exists ("ref files", $ref);
+}
 my $trainer_exe = &param_required("train.trainer");
 &check_exists("Training executable", $trainer_exe);
 #my $weights_file = &param_required("train.weights-file");
@@ -94,20 +97,21 @@ my $burn_in_reference_files = &param("train.burn-in-reference-files");
 my $skipTrain = &param("train.skip", 0);
 
 #devtest configuration
-my ($devtest_input_file, $devtest_reference_file,$devtest_ini_file,$bleu_script,$use_moses);
+my ($devtest_input_file, $devtest_reference_files,$devtest_ini_file,$bleu_script,$use_moses);
 my $test_exe = &param("devtest.moses");
 &check_exists("test executable", $test_exe);
 $bleu_script  = &param_required("devtest.bleu");
 &check_exists("multi-bleu script", $bleu_script);
 $devtest_input_file = &param_required("devtest.input-file");
-$devtest_reference_file = &param_required("devtest.reference-file");
 &check_exists ("devtest input file", $devtest_input_file);
-
-for my $ref (glob $devtest_reference_file . "*") {
+$devtest_reference_files = &param_required("devtest.reference-file");
+for my $ref (glob $devtest_reference_files . "*") {
     &check_exists ("devtest ref file", $ref);
 }
 $devtest_ini_file = &param_required("devtest.moses-ini-file");
 &check_exists ("devtest ini file", $devtest_ini_file);
+
+
 my $weight_file_stem = "$name-weights";
 my $extra_memory_devtest = &param("devtest.extra-memory",0);
 my $skip_devtest = &param("devtest.skip-devtest",0);
@@ -174,8 +178,9 @@ my @refs;
 if (ref($reference_files) eq 'ARRAY') {
     @refs = @$reference_files;
 } else {
-    @refs = glob $reference_files;
+    @refs = glob $reference_files . "*"
 }
+my $arr_refs = \@refs;
 
 if (!$skipTrain) {
 #write the script
@@ -198,7 +203,6 @@ print TRAIN "-f $moses_ini_file \\\n";
 print TRAIN "-i $input_file \\\n";
 
 for my $ref (@refs) {
-    &check_exists("train ref file",  $ref);
     print TRAIN "-r $ref ";
 }
 print TRAIN "\\\n";
@@ -206,15 +210,15 @@ print TRAIN "\\\n";
 if ($burn_in) {
     print TRAIN "--burn-in 1 \\\n";
     print TRAIN "--burn-in-input-file $burn_in_input_file \\\n";
-    my @refs;
+    my @burnin_refs;
     if (ref($burn_in_reference_files) eq 'ARRAY') {
-	@refs = @$burn_in_reference_files;
+	@burnin_refs = @$burn_in_reference_files;
     } else {
-	@refs = glob $burn_in_reference_files;
+	@burnin_refs = glob $burn_in_reference_files . "*";
     }
-    for my $ref (@refs) {
-	&check_exists("burn-in ref file",  $ref);
-	print TRAIN "--burn-in-reference-files $ref ";
+    for my $burnin_ref (@burnin_refs) {
+	&check_exists("burn-in ref file",  $burnin_ref);
+	print TRAIN "--burn-in-reference-files $burnin_ref ";
     }
     print TRAIN "\\\n";
 }
@@ -317,10 +321,10 @@ while(1) {
     my $suffix = "";
     print "weight file exists? ".(-e $new_weight_file)."\n";
     if (!$skip_devtest) {
-	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_file, $skip_submit_test);
+	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_files, $skip_submit_test);
     }
     if (!$skip_dev) {
-	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $refs[0], $skip_submit_test);
+	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $reference_files, $skip_submit_test);
     }
 }
 
diff --git a/moses-chart-cmd/src/IOWrapper.cpp b/moses-chart-cmd/src/IOWrapper.cpp
index cf90b877b..ce31bc192 100644
--- a/moses-chart-cmd/src/IOWrapper.cpp
+++ b/moses-chart-cmd/src/IOWrapper.cpp
@@ -47,6 +47,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "ChartHypothesis.h"
 #include "DotChart.h"
 
+#include <boost/algorithm/string.hpp>
+#include "FeatureVector.h"
+
 
 using namespace std;
 using namespace Moses;
@@ -345,7 +348,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
     // print the surface factor of the translation
     out << translationId << " ||| ";
     OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
-    out << " |||";
+    out << " ||| ";
 
     // print the scores in a hardwired order
     // before each model type, the corresponding command-line-like name must be emitted
@@ -362,26 +365,23 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
       }
     }
 
-
     std::string lastName = "";
 
     // translation components
     const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
     if (pds.size() > 0) {
-
       for( size_t i=0; i<pds.size(); i++ ) {
-	size_t pd_numinputscore = pds[i]->GetNumInputScores();
-	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
-	for (size_t j = 0; j<scores.size(); ++j){
-
-	  if (labeledOutput && (i == 0) ){
-	    if ((j == 0) || (j == pd_numinputscore)){
-	      lastName =  pds[i]->GetScoreProducerWeightShortName(j);
-	      out << " " << lastName << ":";
-	    }
-	  }
-	  out << " " << scores[j];
-	}
+      	size_t pd_numinputscore = pds[i]->GetNumInputScores();
+      	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
+      	for (size_t j = 0; j<scores.size(); ++j){
+      		if (labeledOutput && (i == 0) ){
+      			if ((j == 0) || (j == pd_numinputscore)){
+      				lastName =  pds[i]->GetScoreProducerWeightShortName(j);
+      				out << " " << lastName << ":";
+      			}
+      		}
+      		out << " " << scores[j];
+      	}
       }
     }
 
@@ -393,26 +393,36 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
     // generation
     const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
     if (gds.size() > 0) {
-
       for( size_t i=0; i<gds.size(); i++ ) {
-	size_t pd_numinputscore = gds[i]->GetNumInputScores();
-	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
-	for (size_t j = 0; j<scores.size(); ++j){
-
-	  if (labeledOutput && (i == 0) ){
-	    if ((j == 0) || (j == pd_numinputscore)){
-	      lastName =  gds[i]->GetScoreProducerWeightShortName(j);
-	      out << " " << lastName << ":";
-	    }
-	  }
-	  out << " " << scores[j];
-	}
+      	size_t pd_numinputscore = gds[i]->GetNumInputScores();
+      	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
+      	for (size_t j = 0; j<scores.size(); ++j){
+      		if (labeledOutput && (i == 0) ){
+      			if ((j == 0) || (j == pd_numinputscore)){
+      				lastName =  gds[i]->GetScoreProducerWeightShortName(j);
+      				out << " " << lastName << ":";
+      			}
+      		}
+      		out << " " << scores[j];
+      	}
       }
     }
 
+    // output sparse features
+    lastName = "";
+    const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
+    for( size_t i=0; i<sff.size(); i++ )
+    	if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
+    		OutputSparseFeatureScores( out, path, sff[i], lastName );
+
+    const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
+    for( size_t i=0; i<slf.size(); i++ )
+    	if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
+    		OutputSparseFeatureScores( out, path, slf[i], lastName );
+
 
     // total
-    out << " |||" << path.GetTotalScore();
+    out << " ||| " << path.GetTotalScore();
 
     /*
     if (includeAlignment) {
@@ -443,6 +453,32 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
   m_nBestOutputCollector->Write(translationId, out.str());
 }
 
+void IOWrapper::OutputSparseFeatureScores( std::ostream& out, const ChartTrellisPath &path, const FeatureFunction *ff, std::string &lastName )
+{
+  const StaticData &staticData = StaticData::Instance();
+  bool labeledOutput = staticData.IsLabeledNBestList();
+  const FVector scores = path.GetScoreBreakdown().GetVectorForProducer( ff );
+
+  // report weighted aggregate
+  if (! ff->GetSparseFeatureReporting()) {
+  	const FVector &weights = staticData.GetAllWeights().GetScoresVector();
+  	if (labeledOutput && !boost::contains(ff->GetScoreProducerDescription(), ":"))
+  		out << " " << ff->GetScoreProducerWeightShortName() << ":";
+    out << " " << scores.inner_product(weights);
+  }
+
+  // report each feature
+  else {
+  	for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
+  		if (i->second != 0) { // do not report zero-valued features
+  			if (labeledOutput)
+  				out << " " << i->first << ":";
+        out << " " << i->second;
+      }
+    }
+  }
+}
+
 void IOWrapper::FixPrecision(std::ostream &stream, size_t size)
 {
   stream.setf(std::ios::fixed);
diff --git a/moses-chart-cmd/src/IOWrapper.h b/moses-chart-cmd/src/IOWrapper.h
index 5936e7405..058ee0712 100644
--- a/moses-chart-cmd/src/IOWrapper.h
+++ b/moses-chart-cmd/src/IOWrapper.h
@@ -44,6 +44,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "OutputCollector.h"
 #include "ChartHypothesis.h"
 
+#include "ChartTrellisPath.h"
+
 namespace Moses
 {
 class FactorCollection;
@@ -82,6 +84,7 @@ public:
   void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
   void OutputBestHypo(const std::vector<const Moses::Factor*>&  mbrBestHypo, long translationId, bool reportSegmentation, bool reportAllFactors);
   void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartHypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId);
+  void OutputSparseFeatureScores(std::ostream& out, const Moses::ChartTrellisPath &path, const Moses::FeatureFunction *ff, std::string &lastName);
   void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, long translationId);
   void Backtrack(const Moses::ChartHypothesis *hypo);
 
diff --git a/moses-chart-cmd/src/Main.cpp b/moses-chart-cmd/src/Main.cpp
index 2c9002720..bec974ee8 100644
--- a/moses-chart-cmd/src/Main.cpp
+++ b/moses-chart-cmd/src/Main.cpp
@@ -165,18 +165,25 @@ bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source
 }
 static void PrintFeatureWeight(const FeatureFunction* ff)
 {
-
   size_t numScoreComps = ff->GetNumScoreComponents();
   if (numScoreComps != ScoreProducer::unlimited) {
     vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
-    for (size_t i = 0; i < numScoreComps; ++i) {
+    for (size_t i = 0; i < numScoreComps; ++i) 
       cout << ff->GetScoreProducerDescription() <<  " "
            << ff->GetScoreProducerWeightShortName() << " "
            << values[i] << endl;
-    }
-  } else {
-    cout << ff->GetScoreProducerDescription() << " " <<
-         ff->GetScoreProducerWeightShortName() << " sparse" <<  endl;
+  }
+}
+
+static void PrintSparseFeatureWeight(const FeatureFunction* ff)
+{
+  if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) {
+    if (ff->GetSparseProducerWeight() == 1)
+      cout << ff->GetScoreProducerDescription() << " " <<
+	ff->GetScoreProducerWeightShortName() << " sparse" << endl;
+    else
+      cout << ff->GetScoreProducerDescription() << " " <<
+	ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl;
   }
 }
 
@@ -201,6 +208,9 @@ static void ShowWeights()
   for (size_t i = 0; i < slf.size(); ++i) {
     PrintFeatureWeight(slf[i]);
   }
+  for (size_t i = 0; i < sff.size(); ++i) {
+    PrintSparseFeatureWeight(sff[i]);
+  }
 }
 
 
diff --git a/moses-cmd/src/Main.cpp b/moses-cmd/src/Main.cpp
index 0eccac246..feb092dab 100644
--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@@ -287,21 +287,27 @@ private:
 
 static void PrintFeatureWeight(const FeatureFunction* ff)
 {
-
   size_t numScoreComps = ff->GetNumScoreComponents();
   if (numScoreComps != ScoreProducer::unlimited) {
     vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
-    for (size_t i = 0; i < numScoreComps; ++i) {
+    for (size_t i = 0; i < numScoreComps; ++i) 
       cout << ff->GetScoreProducerDescription() <<  " "
            << ff->GetScoreProducerWeightShortName() << " "
            << values[i] << endl;
-    }
-  } else {
-    cout << ff->GetScoreProducerDescription() << " " <<
-         ff->GetScoreProducerWeightShortName() << " sparse" <<  endl;
   }
 }
 
+static void PrintSparseFeatureWeight(const FeatureFunction* ff)
+{
+  if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) {
+    if (ff->GetSparseProducerWeight() == 1)
+      cout << ff->GetScoreProducerDescription() << " " <<
+	ff->GetScoreProducerWeightShortName() << " sparse" <<  endl;
+    else
+      cout << ff->GetScoreProducerDescription() << " " <<
+	ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl;
+  }
+}
 
 static void ShowWeights()
 {
@@ -324,6 +330,9 @@ static void ShowWeights()
   for (size_t i = 0; i < gds.size(); ++i) {
     PrintFeatureWeight(gds[i]);
   }
+  for (size_t i = 0; i < sff.size(); ++i) {
+    PrintSparseFeatureWeight(sff[i]);
+  }
 }
 
 /** main function of the command line version of the decoder **/
diff --git a/moses/src/BleuScoreFeature.cpp b/moses/src/BleuScoreFeature.cpp
index 142fa27c4..4ab9f92a9 100644
--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@@ -81,11 +81,13 @@ void BleuScoreFeature::PrintHistory(std::ostream& out) const {
   }
 }
 
-void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
-		  float scaleByX, float historySmoothing, size_t scheme, float relaxBP) {
+void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
+		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
+		float scaleByX, float historySmoothing, size_t scheme, float relaxBP) {
 	m_scale_by_input_length = scaleByInputLength;
 	m_scale_by_ref_length = scaleByRefLength;
-	m_scale_by_target_length = scaleByTargetLength;
+	m_scale_by_target_length_linear = scaleByTargetLengthLinear;
+	m_scale_by_target_length_trend = scaleByTargetLengthTrend;
 	m_scale_by_avg_length = scaleByAvgLength;
 	m_scale_by_x = scaleByX;
 	m_historySmoothing = historySmoothing;
@@ -97,6 +99,7 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
 {
 	m_refs.clear();
     FactorCollection& fc = FactorCollection::Instance();
+    cerr << "Number of reference files: " << refs.size() << endl; 
     for (size_t file_id = 0; file_id < refs.size(); file_id++) {
       for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
           const string& ref = refs[file_id][ref_id];
@@ -430,13 +433,19 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
   else if (m_scale_by_ref_length) {
     precision *= m_ref_length_history + m_cur_ref_length;
   }
-  else if (m_scale_by_target_length) {
-  	precision *= m_target_length_history + state->m_target_length;
+  else if (m_scale_by_target_length_linear) {
+  	// length of current hypothesis + number of words still to translate from source (rest being translated 1-to-1)
+  	float scaled_target_length = state->m_target_length + (m_cur_source_length - state->m_source_length);
+  	precision *= m_target_length_history + scaled_target_length;
+  }
+  else if (m_scale_by_target_length_trend) {
+  	// length of full target if remaining words were translated with the same fertility as so far
+  	float scaled_target_length = ((float)m_cur_source_length/state->m_source_length) * state->m_target_length;
+  	precision *= m_target_length_history + scaled_target_length;
   }
   else if (m_scale_by_avg_length) {
     precision *= (m_source_length_history + m_ref_length_history + m_cur_source_length +  + m_cur_ref_length) / 2;
   }
-
   return precision*m_scale_by_x;
 }
 
diff --git a/moses/src/BleuScoreFeature.h b/moses/src/BleuScoreFeature.h
index ac5d113fd..3ff76c465 100644
--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@@ -80,7 +80,8 @@ public:
     void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
     void PrintReferenceLength(const std::vector<size_t>& ref_ids);
     size_t GetReferenceLength(size_t ref_id);
-    void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
+    void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
+    		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
   		  float scaleByX, float historySmoothing, size_t scheme, float relaxBP);
     void GetNgramMatchCounts(Phrase&,
                              const NGrams&,
@@ -125,8 +126,11 @@ private:
     // scale BLEU score by (history of) reference length
     bool m_scale_by_ref_length;
 
-    // scale BLEU score by (history of) target length
-    bool m_scale_by_target_length;
+    // scale BLEU score by (history of) target length (linear future estimate)
+    bool m_scale_by_target_length_linear;
+
+    // scale BLEU score by (history of) target length (trend-based future estimate)
+        bool m_scale_by_target_length_trend;
 
     // scale BLEU score by (history of) the average of input and reference length
     bool m_scale_by_avg_length;
diff --git a/moses/src/FeatureVector.cpp b/moses/src/FeatureVector.cpp
index 05da851b0..ce4754bb5 100644
--- a/moses/src/FeatureVector.cpp
+++ b/moses/src/FeatureVector.cpp
@@ -255,17 +255,10 @@ namespace Moses {
   }
 
   FVector& FVector::operator+= (const FVector& rhs) {
-    if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
+    if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
       resize(rhs.m_coreFeatures.size());
-    }
-    for (iterator i = begin(); i != end(); ++i) {
-      set(i->first,i->second + rhs.get(i->first));
-    }
-    for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
-      if (!hasNonDefaultValue(i->first)) {
-        set(i->first,i->second);
-      }
-    }
+    for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+    	set(i->first, get(i->first) + i->second);
     for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
       if (i < rhs.m_coreFeatures.size()) {
         m_coreFeatures[i] += rhs.m_coreFeatures[i];
@@ -275,17 +268,10 @@ namespace Moses {
   }
   
   FVector& FVector::operator-= (const FVector& rhs) {
-    if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
+    if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
       resize(rhs.m_coreFeatures.size());
-    }
-    for (iterator i = begin(); i != end(); ++i) {
-      set(i->first,i->second - rhs.get(i->first));
-    }
-    for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
-      if (!hasNonDefaultValue(i->first)) {
-        set(i->first,-(i->second));
-      }
-    }
+    for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+    	set(i->first, get(i->first) -(i->second));
     for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
       if (i < rhs.m_coreFeatures.size()) {
         m_coreFeatures[i] -= rhs.m_coreFeatures[i];
@@ -336,28 +322,6 @@ namespace Moses {
     return *this;
   }
   
-	FVector& FVector::max_equals(const FVector& rhs) {
-    if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
-      resize(rhs.m_coreFeatures.size());
-    }
-		for (iterator i = begin(); i != end(); ++i) {
-		  set(i->first, max(i->second , rhs.get(i->first) ));
-		}
-		for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
-      if (!hasNonDefaultValue(i->first)) {
-			  set(i->first, i->second);
-      }
-		}
-    for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
-      if (i < rhs.m_coreFeatures.size()) {
-        m_coreFeatures[i] = max(m_coreFeatures[i], rhs.m_coreFeatures[i]);
-      } else {
-        m_coreFeatures[i] = max(m_coreFeatures[i],(float)0);
-      }
-    }
-		return *this;
-	}
-  
   FVector& FVector::operator*= (const FValue& rhs) {
     //NB Could do this with boost::bind ?
     for (iterator i = begin(); i != end(); ++i) {
@@ -367,7 +331,6 @@ namespace Moses {
     return *this;
   }
   
-  
   FVector& FVector::operator/= (const FValue& rhs) {
     for (iterator i = begin(); i != end(); ++i) {
       i->second /= rhs;
@@ -387,6 +350,25 @@ namespace Moses {
     return norm;
   }
   
+  FValue FVector::l2norm() const {
+    return sqrt(inner_product(*this));
+  }
+
+  FValue FVector::linfnorm() const {
+    FValue norm = 0;
+    for (const_iterator i = cbegin(); i != cend(); ++i) {
+      float absValue = abs(i->second);
+      if (absValue > norm)
+	norm = absValue;
+    }
+    for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+      float absValue = m_coreFeatures[i];
+      if (absValue > norm)
+	norm = absValue;
+    }
+    return norm;
+  }
+
   FValue FVector::sum() const {
     FValue sum = 0;
     for (const_iterator i = cbegin(); i != cend(); ++i) {
@@ -395,11 +377,7 @@ namespace Moses {
     sum += m_coreFeatures.sum();
     return sum;
   }
-  
-  FValue FVector::l2norm() const {
-    return sqrt(inner_product(*this));
-  }
-  
+    
   FValue FVector::inner_product(const FVector& rhs) const {
     CHECK(m_coreFeatures.size() == rhs.m_coreFeatures.size());
     FValue product = 0.0;
@@ -436,11 +414,7 @@ namespace Moses {
   const FVector operator/(const FVector& lhs, const FValue& rhs) {
     return FVector(lhs) /= rhs;
   }
-  
-  const FVector fvmax(const FVector& lhs, const FVector& rhs) {
-    return FVector(lhs).max_equals(rhs);
-  }
-  
+
   FValue inner_product(const FVector& lhs, const FVector& rhs) {
     if (lhs.size() >= rhs.size()) {
       return rhs.inner_product(lhs);
diff --git a/moses/src/FeatureVector.h b/moses/src/FeatureVector.h
index db2e9202f..f2d187ba6 100644
--- a/moses/src/FeatureVector.h
+++ b/moses/src/FeatureVector.h
@@ -177,6 +177,7 @@ namespace Moses {
     /** norms and sums */
     FValue l1norm() const;
     FValue l2norm() const;
+    FValue linfnorm() const;
     FValue sum() const;
     
     /** pretty printing */
@@ -292,6 +293,10 @@ namespace Moses {
       return (m_fv->m_features[m_name] += lhs);
     }
     
+    FValue operator -=(FValue lhs) {
+      return (m_fv->m_features[m_name] -= lhs);
+    }
+
   private:
     FValue m_tmp;
     
diff --git a/moses/src/FeatureVectorTest.cpp b/moses/src/FeatureVectorTest.cpp
index f6520ead8..af1829e62 100644
--- a/moses/src/FeatureVectorTest.cpp
+++ b/moses/src/FeatureVectorTest.cpp
@@ -224,26 +224,6 @@ BOOST_AUTO_TEST_CASE(core_scalar)
 
 }
 
-BOOST_AUTO_TEST_CASE(core_max) 
-{
-  FVector f1(2);
-  FVector f2(2);
-  FName n1("a");
-  FName n2("b");
-  FName n3("c");
-  f1[0] = 1.1; f1[1] = -0.1; ; f1[n2] = -1.5; f1[n3] = 2.2;
-  f2[0] = 0.5; f2[1] = 0.25; f2[n1] = 1; f2[n3] = 2.4;
-
-  FVector m = fvmax(f1,f2);
-
-  BOOST_CHECK_CLOSE((FValue)m[0], 1.1 , TOL);
-  BOOST_CHECK_CLOSE((FValue)m[1], 0.25 , TOL);
-  BOOST_CHECK_CLOSE((FValue)m[n1], 1 , TOL);
-  BOOST_CHECK_CLOSE((FValue)m[n2],0  , TOL);
-  BOOST_CHECK_CLOSE((FValue)m[n3],2.4  , TOL);
-
-}
-
 BOOST_AUTO_TEST_CASE(l1norm) 
 {
   FVector f1(3);
diff --git a/moses/src/ScoreComponentCollection.cpp b/moses/src/ScoreComponentCollection.cpp
index b7e54c554..d9cb40e37 100644
--- a/moses/src/ScoreComponentCollection.cpp
+++ b/moses/src/ScoreComponentCollection.cpp
@@ -63,8 +63,8 @@ void ScoreComponentCollection::MultiplyEquals(float scalar)
 
 // Multiply all weights of this sparse producer by a given scalar
 void ScoreComponentCollection::MultiplyEquals(const ScoreProducer* sp, float scalar) {
-	CHECK(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
-  std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP;
+	assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
+  std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
   for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
     std::stringstream name;
     name << i->first;
@@ -100,6 +100,10 @@ float ScoreComponentCollection::GetL2Norm() const {
   return m_scores.l2norm();
 }
 
+float ScoreComponentCollection::GetLInfNorm() const {
+  return m_scores.linfnorm();
+}
+
 void ScoreComponentCollection::Save(ostream& out) const {
   ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
   for (; iter != s_scoreIndexes.end(); ++iter ) {
diff --git a/moses/src/ScoreComponentCollection.h b/moses/src/ScoreComponentCollection.h
index dc0913eb6..133e2840d 100644
--- a/moses/src/ScoreComponentCollection.h
+++ b/moses/src/ScoreComponentCollection.h
@@ -150,6 +150,21 @@ public:
 	  m_scores -= rhs.m_scores;
 	}
 
+  //For features which have an unbounded number of components
+  void MinusEquals(const ScoreProducer*sp, const std::string& name, float score)
+  {
+    assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
+    FName fname(sp->GetScoreProducerDescription(),name);
+    m_scores[fname] -= score;
+  }
+
+  //For features which have an unbounded number of components
+  void SparseMinusEquals(const std::string& full_name, float score)
+  {
+    FName fname(full_name);
+    m_scores[fname] -= score;
+  }
+
 
 	//! Add scores from a single ScoreProducer only
 	//! The length of scores must be equal to the number of score components
@@ -192,6 +207,13 @@ public:
     m_scores[fname] += score;
   }
 
+  //For features which have an unbounded number of components
+  void SparsePlusEquals(const std::string& full_name, float score)
+  {
+  	FName fname(full_name);
+    m_scores[fname] += score;
+  }
+
 	void Assign(const ScoreProducer* sp, const std::vector<float>& scores)
 	{
     IndexPair indexes = GetIndexes(sp);
@@ -307,6 +329,7 @@ public:
   void L1Normalise();
   float GetL1Norm() const;
   float GetL2Norm() const;
+  float GetLInfNorm() const;
   void Save(const std::string& filename) const;
   void Save(std::ostream&) const;
 
diff --git a/moses/src/ScoreProducer.h b/moses/src/ScoreProducer.h
index 113a37770..65b655972 100644
--- a/moses/src/ScoreProducer.h
+++ b/moses/src/ScoreProducer.h
@@ -54,6 +54,8 @@ public:
 
   void SetSparseFeatureReporting() { m_reportSparseFeatures = true; }
   bool GetSparseFeatureReporting() const { return m_reportSparseFeatures; } 
+
+  virtual float GetSparseProducerWeight() const { return 1; }
 };
 
 
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index d24555f77..406ba6c43 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -1442,7 +1442,7 @@ bool StaticData::LoadReferences()
     }
     string line;
     while (getline(in,line)) {
-      references.back().push_back(line);
+      references[i].push_back(line);
     }
     if (i > 0) {
       if (references[i].size() != references[i-1].size()) {
@@ -1459,14 +1459,12 @@ bool StaticData::LoadReferences()
 
 bool StaticData::LoadDiscrimLMFeature()
 {
-	cerr << "Loading discriminative language models.. ";
-
-  // only load if specified
+	// only load if specified
   const vector<string> &wordFile = m_parameter->GetParam("discrim-lmodel-file");
   if (wordFile.empty()) {
     return true;
   }
-  cerr << wordFile.size() << " models" << endl;
+  cerr << "Loading " << wordFile.size() << " discriminative language model(s).." << endl;
 
   // if this weight is specified, the sparse DLM weights will be scaled with an additional weight
   vector<string> dlmWeightStr = m_parameter->GetParam("weight-dlm");
@@ -1495,6 +1493,11 @@ bool StaticData::LoadDiscrimLMFeature()
   		}
   	}
   	else {
+  		if (m_searchAlgorithm == ChartDecoding && !include_lower_ngrams) {
+  			UserMessage::Add("Excluding lower order DLM ngrams is currently not supported for chart decoding.");
+  			return false;
+  		}
+
   		m_targetNgramFeatures.push_back(new TargetNgramFeature(factorId, order, include_lower_ngrams));
   		if (i < dlmWeights.size())
   			m_targetNgramFeatures[i]->SetSparseProducerWeight(dlmWeights[i]);
diff --git a/moses/src/TargetNgramFeature.cpp b/moses/src/TargetNgramFeature.cpp
index 9da9ba670..3fefdfba2 100644
--- a/moses/src/TargetNgramFeature.cpp
+++ b/moses/src/TargetNgramFeature.cpp
@@ -3,6 +3,7 @@
 #include "TargetPhrase.h"
 #include "Hypothesis.h"
 #include "ScoreComponentCollection.h"
+#include "ChartHypothesis.h"
 
 namespace Moses {
 
@@ -12,25 +13,25 @@ int TargetNgramState::Compare(const FFState& other) const {
   const TargetNgramState& rhs = dynamic_cast<const TargetNgramState&>(other);
   int result;
   if (m_words.size() == rhs.m_words.size()) {
-  	for (size_t i = 0; i < m_words.size(); ++i) {
-  		result = Word::Compare(m_words[i],rhs.m_words[i]);
-  		if (result != 0) return result;
-  	}
+        for (size_t i = 0; i < m_words.size(); ++i) {
+                result = Word::Compare(m_words[i],rhs.m_words[i]);
+                if (result != 0) return result;
+        }
     return 0;
   }
   else if (m_words.size() < rhs.m_words.size()) {
-  	for (size_t i = 0; i < m_words.size(); ++i) {
-  		result = Word::Compare(m_words[i],rhs.m_words[i]);
-  		if (result != 0) return result;
-  	}
-  	return -1;
+        for (size_t i = 0; i < m_words.size(); ++i) {
+                result = Word::Compare(m_words[i],rhs.m_words[i]);
+                if (result != 0) return result;
+        }
+        return -1;
   }
   else {
-  	for (size_t i = 0; i < rhs.m_words.size(); ++i) {
-  		result = Word::Compare(m_words[i],rhs.m_words[i]);
-  		if (result != 0) return result;
-  	}
-  	return 1;
+        for (size_t i = 0; i < rhs.m_words.size(); ++i) {
+                result = Word::Compare(m_words[i],rhs.m_words[i]);
+                if (result != 0) return result;
+        }
+        return 1;
   }
 }
 
@@ -45,7 +46,7 @@ bool TargetNgramFeature::Load(const std::string &filePath)
 
   std::string line;
   m_vocab.insert(BOS_);
-  m_vocab.insert(BOS_);
+  m_vocab.insert(EOS_);
   while (getline(inFile, line)) {
     m_vocab.insert(line);
   }
@@ -54,10 +55,9 @@ bool TargetNgramFeature::Load(const std::string &filePath)
   return true;
 }
 
-
 string TargetNgramFeature::GetScoreProducerWeightShortName(unsigned) const
 {
-	return "dlmn";
+	return "dlm";
 }
 
 size_t TargetNgramFeature::GetNumInputScores() const
@@ -65,7 +65,6 @@ size_t TargetNgramFeature::GetNumInputScores() const
 	return 0;
 }
 
-
 const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input*/) const
 {
 	vector<Word> bos(1,m_bos);
@@ -76,8 +75,8 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
                                        const FFState* prev_state,
                                        ScoreComponentCollection* accumulator) const
 {
-  const TargetNgramState* tnState = dynamic_cast<const TargetNgramState*>(prev_state);
-  CHECK(tnState);
+  const TargetNgramState* tnState = static_cast<const TargetNgramState*>(prev_state);
+  assert(tnState);
 
   // current hypothesis target phrase
   const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
@@ -85,7 +84,7 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
 
   // extract all ngrams from current hypothesis
   vector<Word> prev_words = tnState->GetWords();
-  string curr_ngram;
+  stringstream curr_ngram;
   bool skip = false;
 
   // include lower order ngrams?
@@ -94,7 +93,9 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
 
   for (size_t n = m_n; n >= smallest_n; --n) { // iterate over ngram size
   	for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
-  		const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString();
+//  		const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString();
+  		const string& curr_w = targetPhrase.GetWord(i).GetString(m_factorType);
+
   		if (m_vocab.size() && (m_vocab.find(curr_w) == m_vocab.end())) continue; // skip ngrams
 
   		if (n > 1) {
@@ -129,23 +130,23 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
       }
 
   		if (!skip) {
-  			curr_ngram.append(curr_w);
-  			accumulator->PlusEquals(this,curr_ngram,1);
+  			curr_ngram << curr_w;
+  			accumulator->PlusEquals(this,curr_ngram.str(),1);
       }
-  		curr_ngram.clear();
+  		curr_ngram.str("");
   	}
   }
 
   if (cur_hypo.GetWordsBitmap().IsComplete()) {
   	for (size_t n = m_n; n >= smallest_n; --n) {
-  		string last_ngram;
+  		stringstream last_ngram;
   		skip = false;
   		for (size_t i = cur_hypo.GetSize() - n + 1; i <  cur_hypo.GetSize() && !skip; ++i)
   			appendNgram(cur_hypo.GetWord(i), skip, last_ngram);
 
   		if (n > 1 && !skip) {
-  			last_ngram.append(EOS_);
-  			accumulator->PlusEquals(this,last_ngram,1);
+  			last_ngram << EOS_;
+  			accumulator->PlusEquals(this, last_ngram.str(), 1);
     	}
   	}
   	return NULL;
@@ -169,13 +170,267 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
   return new TargetNgramState(new_prev_words);
 }
 
-void TargetNgramFeature::appendNgram(const Word& word, bool& skip, string& ngram) const {
-	const string& w = word.GetFactor(m_factorType)->GetString();
+void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream &ngram) const {
+//	const string& w = word.GetFactor(m_factorType)->GetString();
+	const string& w = word.GetString(m_factorType);
 	if (m_vocab.size() && (m_vocab.find(w) == m_vocab.end())) skip = true;
 	else {
-		ngram.append(w);
-		ngram.append(":");
+		ngram << w;
+		ngram << ":";
 	}
 }
+
+FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const
+{
+  vector<const Word*> contextFactor;
+  contextFactor.reserve(m_n);
+
+  // get index map for underlying hypotheses
+  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+    cur_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
+
+  // loop over rule
+  bool makePrefix = false;
+  bool makeSuffix = false;
+  bool collectForPrefix = true;
+  size_t prefixTerminals = 0;
+  size_t suffixTerminals = 0;
+  bool onlyTerminals = true;
+  bool prev_is_NT = false;
+  size_t prev_subPhraseLength = 0;
+  for (size_t phrasePos = 0; phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize(); phrasePos++)
+  {
+    // consult rule for either word or non-terminal
+    const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(phrasePos);
+//    cerr << "word: " << word << endl;
+
+    // regular word
+    if (!word.IsNonTerminal()) {
+    	contextFactor.push_back(&word);
+    	prev_is_NT = false;
+
+      if (phrasePos==0)
+      	makePrefix = true;
+      if (phrasePos==cur_hypo.GetCurrTargetPhrase().GetSize()-1 || prev_is_NT)
+      	makeSuffix = true;
+      
+      // beginning/end of sentence symbol <s>,</s>?
+      string factorZero = word.GetString(0);
+      if (factorZero.compare("<s>") == 0)
+      	prefixTerminals++;
+      // end of sentence symbol </s>?
+      else if (factorZero.compare("</s>") == 0)
+      	suffixTerminals++;
+      // everything else
+      else {
+      	stringstream ngram;
+      	ngram << m_baseName;
+      	if (m_factorType == 0)
+      		ngram << factorZero;
+      	else
+      		ngram << word.GetString(m_factorType);
+      	accumulator->SparsePlusEquals(ngram.str(), 1);
+
+      	if (collectForPrefix)
+      		prefixTerminals++;
+      	else
+      		suffixTerminals++;
+      }
+    }
+
+    // non-terminal, add phrase from underlying hypothesis
+    else if (m_n > 1)
+    {
+      // look up underlying hypothesis
+      size_t nonTermIndex = nonTermIndexMap[phrasePos];
+      const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermIndex);
+
+      const TargetNgramChartState* prevState =
+      		static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId));
+      size_t subPhraseLength = prevState->GetNumTargetTerminals();
+
+      // special case: rule starts with non-terminal
+      if (phrasePos == 0) {
+      	if (subPhraseLength == 1) {
+      		makePrefix = true;
+      		++prefixTerminals;
+
+      		const Word &word = prevState->GetSuffix().GetWord(0);
+//      		cerr << "NT0 --> : " << word << endl;
+      		contextFactor.push_back(&word);
+      	}
+      	else {
+      		onlyTerminals = false;
+      		collectForPrefix = false;
+      		int suffixPos = prevState->GetSuffix().GetSize() - (m_n-1);
+      		if (suffixPos < 0) suffixPos = 0; // push all words if less than order
+      		for(;(size_t)suffixPos < prevState->GetSuffix().GetSize(); suffixPos++)
+      		{
+      			const Word &word = prevState->GetSuffix().GetWord(suffixPos);
+//      			cerr << "NT0 --> : " << word << endl;
+      			contextFactor.push_back(&word);
+      		}
+      	}
+      }
+
+      // internal non-terminal
+      else
+      {
+      	// push its prefix
+      	for(size_t prefixPos = 0; prefixPos < m_n-1
+              && prefixPos < subPhraseLength; prefixPos++)
+        {
+          const Word &word = prevState->GetPrefix().GetWord(prefixPos);
+//          cerr << "NT --> " << word << endl;
+          contextFactor.push_back(&word);
+        }
+
+      	if (subPhraseLength==1) {
+      		if (collectForPrefix)
+      			++prefixTerminals;
+      		else
+      			++suffixTerminals;
+
+      		if (phrasePos == cur_hypo.GetCurrTargetPhrase().GetSize()-1)
+      			makeSuffix = true;
+      	}
+      	else {
+      		onlyTerminals = false;
+      		collectForPrefix = true;
+
+      		// check if something follows this NT
+      		bool wordFollowing = (phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize() - 1)? true : false;
+
+      		// check if we are dealing with a large sub-phrase
+      		if (wordFollowing && subPhraseLength > m_n - 1)
+      		{
+      			// clear up pending ngrams
+      			MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
+      			contextFactor.clear();
+      			makePrefix = false;
+      			makeSuffix = true;
+      			collectForPrefix = false;
+      			prefixTerminals = 0;
+      			suffixTerminals = 0;
+
+      			// push its suffix
+      			size_t remainingWords = (remainingWords > m_n-1) ? m_n-1 : subPhraseLength - (m_n-1);
+      			for(size_t suffixPos = 0; suffixPos < prevState->GetSuffix().GetSize(); suffixPos++) {
+      				const Word &word = prevState->GetSuffix().GetWord(suffixPos);
+//      				cerr << "NT --> : " << word << endl;
+      				contextFactor.push_back(&word);
+      			}
+      		}
+      		// subphrase can be used as suffix and as prefix for the next part
+      		else if (wordFollowing && subPhraseLength == m_n - 1)
+      		{
+      			// clear up pending ngrams
+      			MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
+      			makePrefix = false;
+      			makeSuffix = true;
+      			collectForPrefix = false;
+      			prefixTerminals = 0;
+      			suffixTerminals = 0;
+      		}
+      		else if (prev_is_NT && prev_subPhraseLength > 1 && subPhraseLength > 1) {
+      			// two NTs in a row: make transition
+      			MakePrefixNgrams(contextFactor, accumulator, 1, m_n-2);
+      			MakeSuffixNgrams(contextFactor, accumulator, 1, m_n-2);
+      			makePrefix = false;
+      			makeSuffix = false;
+      			collectForPrefix = false;
+      			prefixTerminals = 0;
+      			suffixTerminals = 0;
+		
+      			// remove duplicates
+      			stringstream curr_ngram;
+      			curr_ngram << m_baseName;
+      			curr_ngram << (*contextFactor[m_n-2]).GetString(m_factorType);
+      			curr_ngram << ":";
+      			curr_ngram << (*contextFactor[m_n-1]).GetString(m_factorType);
+      			accumulator->SparseMinusEquals(curr_ngram.str(),1);
+      		}
+      	}
+      }
+      prev_is_NT = true;
+      prev_subPhraseLength = subPhraseLength;
+    }
+  }
+
+  if (m_n > 1) {
+    if (onlyTerminals) {
+    	MakePrefixNgrams(contextFactor, accumulator, prefixTerminals-1);
+    }
+    else {
+      if (makePrefix)
+      	MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
+      if (makeSuffix)
+      	MakeSuffixNgrams(contextFactor, accumulator, suffixTerminals);
+
+      // remove duplicates
+      size_t size = contextFactor.size();
+      if (makePrefix && makeSuffix && (size <= m_n)) {
+      	stringstream curr_ngram;
+      	curr_ngram << m_baseName;
+      	for (size_t i = 0; i < size; ++i) {
+      		curr_ngram << (*contextFactor[i]).GetString(m_factorType);
+      		if (i < size-1)
+      			curr_ngram << ":";
+      	}
+      	accumulator->SparseMinusEquals(curr_ngram.str(), 1);
+      }
+    }
+  }
+
+//  cerr << endl;
+  return new TargetNgramChartState(cur_hypo, featureId, m_n);
+}
+
+void TargetNgramFeature::MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfStartPos, size_t offset) const {
+	stringstream ngram;
+	size_t size = contextFactor.size();
+  for (size_t k = 0; k < numberOfStartPos; ++k) {
+    size_t max_end = (size < m_n+k+offset)? size: m_n+k+offset;
+    for (size_t end_pos = 1+k+offset; end_pos < max_end; ++end_pos) {
+      ngram << m_baseName;
+    	for (size_t i=k+offset; i <= end_pos; ++i) {
+      	if (i > k+offset)
+      		ngram << ":";
+        string factorZero = (*contextFactor[i]).GetString(0);
+        if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
+      		ngram << factorZero;
+      	else
+      		ngram << (*contextFactor[i]).GetString(m_factorType);
+      	const Word w = *contextFactor[i];
+      }
+//      cerr << "p-ngram: " << ngram.str() << endl;
+      accumulator->SparsePlusEquals(ngram.str(), 1);
+      ngram.str("");
+    }
+  }
+}
+
+void TargetNgramFeature::MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfEndPos, size_t offset) const {
+	stringstream ngram;
+  for (size_t k = 0; k < numberOfEndPos; ++k) {
+    size_t end_pos = contextFactor.size()-1-k-offset;
+    for (int start_pos=end_pos-1; (start_pos >= 0) && (end_pos-start_pos < m_n); --start_pos) {
+    	ngram << m_baseName;
+    	for (size_t j=start_pos; j <= end_pos; ++j){
+    		string factorZero = (*contextFactor[j]).GetString(0);
+    		if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
+    			ngram << factorZero;
+    		else
+    			ngram << (*contextFactor[j]).GetString(m_factorType);
+    		if (j < end_pos)
+      		ngram << ":";
+    	}
+//      cerr << "s-ngram: " << ngram.str() << endl;
+      accumulator->SparsePlusEquals(ngram.str(), 1);
+      ngram.str("");
+    }
+  }
+}
+
 }
 
diff --git a/moses/src/TargetNgramFeature.h b/moses/src/TargetNgramFeature.h
index ca87f5dd6..681e7d6aa 100644
--- a/moses/src/TargetNgramFeature.h
+++ b/moses/src/TargetNgramFeature.h
@@ -9,6 +9,10 @@
 #include "FFState.h"
 #include "Word.h"
 
+#include "LM/SingleFactor.h"
+#include "ChartHypothesis.h"
+#include "ChartManager.h"
+
 namespace Moses
 {
 
@@ -22,43 +26,190 @@ class TargetNgramState : public FFState {
     std::vector<Word> m_words;
 };
 
+class TargetNgramChartState : public FFState
+{
+private:
+  Phrase m_contextPrefix, m_contextSuffix;
+
+  size_t m_numTargetTerminals; // This isn't really correct except for the surviving hypothesis
+
+  size_t m_startPos, m_endPos, m_inputSize;
+
+  /** Construct the prefix string of up to specified size
+   * \param ret prefix string
+   * \param size maximum size (typically max lm context window)
+   */
+  size_t CalcPrefix(const ChartHypothesis &hypo, const int featureId, Phrase &ret, size_t size) const
+  {
+    const TargetPhrase &target = hypo.GetCurrTargetPhrase();
+    const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+      target.GetAlignmentInfo().GetNonTermIndexMap();
+
+    // loop over the rule that is being applied
+    for (size_t pos = 0; pos < target.GetSize(); ++pos) {
+      const Word &word = target.GetWord(pos);
+
+      // for non-terminals, retrieve it from underlying hypothesis
+      if (word.IsNonTerminal()) {
+        size_t nonTermInd = nonTermIndexMap[pos];
+        const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
+        size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcPrefix(*prevHypo, featureId, ret, size);
+//        Phrase phrase = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->GetPrefix();
+//        size = phrase.GetSize();
+      }
+      // for words, add word
+      else {
+        ret.AddWord(word);
+        size--;
+      }
+
+      // finish when maximum length reached
+      if (size==0)
+        break;
+    }
+
+    return size;
+  }
+
+  /** Construct the suffix phrase of up to specified size
+   * will always be called after the construction of prefix phrase
+   * \param ret suffix phrase
+   * \param size maximum size of suffix
+   */
+  size_t CalcSuffix(const ChartHypothesis &hypo, int featureId, Phrase &ret, size_t size) const
+  {
+  	size_t prefixSize = m_contextPrefix.GetSize();
+    assert(prefixSize <= m_numTargetTerminals);
+
+    // special handling for small hypotheses
+    // does the prefix match the entire hypothesis string? -> just copy prefix
+    if (prefixSize == m_numTargetTerminals) {
+      size_t maxCount = std::min(prefixSize, size);
+      size_t pos= prefixSize - 1;
+
+      for (size_t ind = 0; ind < maxCount; ++ind) {
+        const Word &word = m_contextPrefix.GetWord(pos);
+        ret.PrependWord(word);
+        --pos;
+      }
+
+      size -= maxCount;
+      return size;
+    }
+    // construct suffix analogous to prefix
+    else {
+    	const TargetPhrase targetPhrase = hypo.GetCurrTargetPhrase();
+      const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+      		targetPhrase.GetAlignmentInfo().GetNonTermIndexMap();
+      for (int pos = (int) targetPhrase.GetSize() - 1; pos >= 0 ; --pos) {
+        const Word &word = targetPhrase.GetWord(pos);
+
+        if (word.IsNonTerminal()) {
+          size_t nonTermInd = nonTermIndexMap[pos];
+          const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
+          size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcSuffix(*prevHypo, featureId, ret, size);
+        }
+        else {
+          ret.PrependWord(word);
+          size--;
+        }
+
+        if (size==0)
+          break;
+      }
+
+      return size;
+    }
+  }
+
+public:
+  TargetNgramChartState(const ChartHypothesis &hypo, int featureId, size_t order)
+      :m_contextPrefix(order - 1),
+      m_contextSuffix(order - 1)
+  {
+    m_numTargetTerminals = hypo.GetCurrTargetPhrase().GetNumTerminals();
+    const WordsRange range = hypo.GetCurrSourceRange();
+    m_startPos = range.GetStartPos();
+    m_endPos = range.GetEndPos();
+    m_inputSize = hypo.GetManager().GetSource().GetSize();
+
+    const std::vector<const ChartHypothesis*> prevHypos = hypo.GetPrevHypos();
+    for (std::vector<const ChartHypothesis*>::const_iterator i = prevHypos.begin(); i != prevHypos.end(); ++i) {
+      // keep count of words (= length of generated string)
+      m_numTargetTerminals += static_cast<const TargetNgramChartState*>((*i)->GetFFState(featureId))->GetNumTargetTerminals();
+    }
+
+    CalcPrefix(hypo, featureId, m_contextPrefix, order - 1);
+    CalcSuffix(hypo, featureId, m_contextSuffix, order - 1);
+  }
+
+  size_t GetNumTargetTerminals() const {
+    return m_numTargetTerminals;
+  }
+
+  const Phrase &GetPrefix() const {
+    return m_contextPrefix;
+  }
+  const Phrase &GetSuffix() const {
+    return m_contextSuffix;
+  }
+
+  int Compare(const FFState& o) const {
+    const TargetNgramChartState &other =
+      static_cast<const TargetNgramChartState &>( o );
+
+    // prefix
+    if (m_startPos > 0) // not for "<s> ..."
+    {
+      int ret = GetPrefix().Compare(other.GetPrefix());
+      if (ret != 0)
+        return ret;
+    }
+
+    if (m_endPos < m_inputSize - 1)// not for "... </s>"
+    {
+      int ret = GetSuffix().Compare(other.GetSuffix());
+      if (ret != 0)
+        return ret;
+    }
+    return 0;
+  }
+};
+
 /** Sets the features of observed ngrams.
  */
 class TargetNgramFeature : public StatefulFeatureFunction {
 public:
 	TargetNgramFeature(FactorType factorType = 0, size_t n = 3, bool lower_ngrams = true):
-     StatefulFeatureFunction("dlmn", ScoreProducer::unlimited),
+     StatefulFeatureFunction("dlm", ScoreProducer::unlimited),
      m_factorType(factorType),
      m_n(n),
      m_lower_ngrams(lower_ngrams),
      m_sparseProducerWeight(1)
   {
     FactorCollection& factorCollection = FactorCollection::Instance();
-    const Factor* bosFactor =
-       factorCollection.AddFactor(Output,m_factorType,BOS_);
+    const Factor* bosFactor = factorCollection.AddFactor(Output,m_factorType,BOS_);
     m_bos.SetFactor(m_factorType,bosFactor);
+    m_baseName = GetScoreProducerDescription();
+    m_baseName.append("_");
   }
 
-
 	bool Load(const std::string &filePath);
 
 	std::string GetScoreProducerWeightShortName(unsigned) const;
 	size_t GetNumInputScores() const;
 
   void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
-  float GetSparseProducerWeight() { return m_sparseProducerWeight; }
+  float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
 
 	virtual const FFState* EmptyHypothesisState(const InputType &input) const;
 
 	virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
 	                          ScoreComponentCollection* accumulator) const;
 
-  virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
-                                  int /* featureID */,
-                                  ScoreComponentCollection* ) const
-                                  {
-                                    abort();
-                                  }
+  virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
+                                  ScoreComponentCollection* accumulator) const;
+
 private:
   FactorType m_factorType;
   Word m_bos;
@@ -69,7 +220,13 @@ private:
 	// additional weight that all sparse weights are scaled with
 	float m_sparseProducerWeight;
 
-	void appendNgram(const Word& word, bool& skip, std::string& ngram) const;
+	std::string m_baseName;
+
+	void appendNgram(const Word& word, bool& skip, std::stringstream& ngram) const;
+	void MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
+			      size_t numberOfStartPos = 1, size_t offset = 0) const;
+	void MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
+			      size_t numberOfEndPos = 1, size_t offset = 0) const;
 };
 
 }
diff --git a/moses/src/Word.cpp b/moses/src/Word.cpp
index bc0a8e120..1ff5df496 100644
--- a/moses/src/Word.cpp
+++ b/moses/src/Word.cpp
@@ -85,6 +85,15 @@ std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlan
   return strme.str();
 }
 
+std::string Word::GetString(FactorType factorType) const
+{
+	const Factor *factor = m_factorArray[factorType];
+  if (factor != NULL)
+  	return factor->GetString();
+  else
+  	return NULL;
+}
+
 void Word::CreateFromString(FactorDirection direction
                             , const std::vector<FactorType> &factorOrder
                             , const std::string &str
@@ -94,7 +103,8 @@ void Word::CreateFromString(FactorDirection direction
 
   vector<string> wordVec;
   Tokenize(wordVec, str, "|");
-  CHECK(wordVec.size() == factorOrder.size());
+  if (!isNonTerminal)
+  	assert(wordVec.size() == factorOrder.size());
 
   const Factor *factor;
   for (size_t ind = 0; ind < wordVec.size(); ++ind) {
diff --git a/moses/src/Word.h b/moses/src/Word.h
index 4818abb60..7dd395030 100644
--- a/moses/src/Word.h
+++ b/moses/src/Word.h
@@ -101,6 +101,7 @@ public:
   * these debugging functions.
   */
   std::string GetString(const std::vector<FactorType> factorType,bool endWithBlank) const;
+  std::string GetString(FactorType factorType) const;
   TO_STRING();
 
   //! transitive comparison of Word objects