Merge from miramerge.

Reverted ChartHypothesis as it breaks chart-decoding.
2024-10-27 03:49:57 +03:00 · 2012-01-20 15:35:55 +00:00 · 2012-01-20 15:35:55 +00:00 · 1e10bb7ef7
commit 1e10bb7ef7
parent ced24a881d
25 changed files with 801 additions and 245 deletions
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@ -184,9 +184,9 @@ namespace Mira {
 	  m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
  }

-  void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
+/*  void MosesDecoder::loadReferenceSentences(const vector<vector<string> >& refs) {
  	m_bleuScoreFeature->LoadReferences(refs);
-  }
+  }*/

  void MosesDecoder::printBleuFeatureHistory(std::ostream& out) {
  	m_bleuScoreFeature->PrintHistory(out);
@ -200,9 +200,11 @@ namespace Mira {
  	return m_bleuScoreFeature->GetReferenceLength(ref_id);
  }

-  void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
+  void MosesDecoder::setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
+  		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
 		  float scaleByX, float historySmoothing, size_t scheme, float relax_BP) {
-	  m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength, scaleByTargetLength,
+	  m_bleuScoreFeature->SetBleuParameters(scaleByInputLength, scaleByRefLength, scaleByAvgLength,
+	  		scaleByTargetLengthLinear, scaleByTargetLengthTrend,
 			  scaleByX, historySmoothing, scheme, relax_BP);
  }
 } 
--- a/mira/Decoder.h
+++ b/mira/Decoder.h
@ -64,11 +64,12 @@ class MosesDecoder {
    size_t getCurrentInputLength();
    void updateHistory(const std::vector<const Moses::Word*>& words);
    void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
-    void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
+//    void loadReferenceSentences(const std::vector<std::vector<std::string> >& refs);
    void printBleuFeatureHistory(std::ostream& out);
    void printReferenceLength(const std::vector<size_t>& ref_ids);
    size_t getReferenceLength(size_t ref_id);
-    void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
+    void setBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
+    		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
  		  float scaleByX, float historySmoothing, size_t scheme, float relax_BP);
    Moses::ScoreComponentCollection getWeights();
    void setWeights(const Moses::ScoreComponentCollection& weights);
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@ -82,7 +82,8 @@ int main(int argc, char** argv) {
 	float historySmoothing;
 	bool scaleByInputLength;
 	bool scaleByReferenceLength;
-	bool scaleByTargetLength;
+	bool scaleByTargetLengthLinear;
+	bool scaleByTargetLengthTrend;
 	bool scaleByAvgLength;
 	float scaleByX;
 	float slack;
@ -119,6 +120,8 @@ int main(int argc, char** argv) {
 	float max_length_dev_hypos;
 	float max_length_dev_reference;
 	float relax_BP;
+	bool stabiliseLength;
+	bool delayUpdates;
 	po::options_description desc("Allowed options");
 	desc.add_options()
 		("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
@ -133,6 +136,7 @@ int main(int argc, char** argv) {
 		("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
 		("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
 		("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
+		("delay-updates", po::value<bool>(&delayUpdates)->default_value(false), "Delay all updates until the end of an epoch")
 		("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
 		("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
 		("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
@ -164,7 +168,8 @@ int main(int argc, char** argv) {
 		("relax-BP", po::value<float>(&relax_BP)->default_value(1), "Relax the BP by setting this value between 0 and 1")
 		("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by (a history of) the input length")
 		("scale-by-reference-length", po::value<bool>(&scaleByReferenceLength)->default_value(false), "Scale BLEU by (a history of) the reference length")
-		("scale-by-target-length", po::value<bool>(&scaleByTargetLength)->default_value(false), "Scale BLEU by (a history of) the target length")
+		("scale-by-target-length-linear", po::value<bool>(&scaleByTargetLengthLinear)->default_value(false), "Scale BLEU by (a history of) the target length (linear future estimate)")
+		("scale-by-target-length-trend", po::value<bool>(&scaleByTargetLengthTrend)->default_value(false), "Scale BLEU by (a history of) the target length (trend-based future estimate)")
 		("scale-by-avg-length", po::value<bool>(&scaleByAvgLength)->default_value(false), "Scale BLEU by (a history of) the average of input and reference length")
 		("scale-by-x", po::value<float>(&scaleByX)->default_value(1), "Scale the BLEU score by value x")
 		("scale-margin", po::value<size_t>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
@ -174,6 +179,7 @@ int main(int argc, char** argv) {
 		("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
 		("slack-min", po::value<float>(&slack_min)->default_value(0.01), "Minimum slack used")
 		("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
+		("stabilise-length", po::value<bool>(&stabiliseLength)->default_value(false), "Stabilise word penalty when length ratio >= 1")
 		("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
 		("threads", po::value<int>(&threadcount)->default_value(1), "Number of threads used")
 		("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
@ -268,11 +274,7 @@ int main(int argc, char** argv) {
 		}
 	}

-	if (scaleByReferenceLength)
-		scaleByInputLength = false;
-	if (scaleByTargetLength)
-		scaleByInputLength = false;
-	if (scaleByAvgLength)
+	if (scaleByReferenceLength || scaleByTargetLengthLinear || scaleByTargetLengthTrend || scaleByAvgLength)
 		scaleByInputLength = false;

 	// initialise Moses
@ -285,7 +287,8 @@ int main(int argc, char** argv) {
 	vector<string> decoder_params;
 	boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
 	MosesDecoder* decoder = new MosesDecoder(mosesConfigFile, verbosity, decoder_params.size(), decoder_params);
-	decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength, scaleByTargetLength,
+	decoder->setBleuParameters(scaleByInputLength, scaleByReferenceLength, scaleByAvgLength,
+			scaleByTargetLengthLinear, scaleByTargetLengthTrend,
 			scaleByX, historySmoothing, bleu_smoothing_scheme, relax_BP);
 	if (normaliseWeights) {
 		ScoreComponentCollection startWeights = decoder->getWeights();
@ -409,6 +412,12 @@ int main(int argc, char** argv) {
 	ScoreComponentCollection mixedAverageWeightsPrevious;
 	ScoreComponentCollection mixedAverageWeightsBeforePrevious;

+	// when length ratio >= 1, set this to true
+	bool fixLength = false;
+
+	// for accumulating delayed updates
+	ScoreComponentCollection delayedWeightUpdates;
+
 	bool stop = false;
 //	int sumStillViolatedConstraints;
 	float *sendbuf, *recvbuf;
@ -427,6 +436,12 @@ int main(int argc, char** argv) {
 		// number of weight dumps this epoch
 		size_t weightEpochDump = 0;

+		// sum lengths of dev hypothesis/references to calculate translation length ratio for this epoch
+		size_t dev_hypothesis_length = 0;
+		size_t dev_reference_length = 0;
+
+		delayedWeightUpdates.ZeroAll();
+
 		size_t shardPosition = 0;
 		vector<size_t>::const_iterator sid = shard.begin();
 		while (sid != shard.end()) {
@ -460,7 +475,7 @@ int main(int argc, char** argv) {
 			for (size_t batchPosition = 0; batchPosition < batchSize && sid
 			    != shard.end(); ++batchPosition) {
 				string& input = inputSentences[*sid];
-				const vector<string>& refs = referenceSentences[*sid];
+//				const vector<string>& refs = referenceSentences[*sid];
 				cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"" << input << "\"" << " (batch pos " << batchPosition << ")" << endl;

 				vector<ScoreComponentCollection> newFeatureValues;
@ -474,7 +489,7 @@ int main(int argc, char** argv) {
 					featureValuesFear.push_back(newFeatureValues);
 					bleuScoresHope.push_back(newBleuScores);
 					bleuScoresFear.push_back(newBleuScores);
-					if (historyOf1best) {
+					if (historyOf1best || stabiliseLength) {
 						dummyFeatureValues.push_back(newFeatureValues);
 						dummyBleuScores.push_back(newBleuScores);
 					}
@ -493,13 +508,16 @@ int main(int argc, char** argv) {
 					cerr << ", l-ratio hope: " << hope_length_ratio << endl;

 					vector<const Word*> bestModel;
-					if (historyOf1best) {
+					if (historyOf1best || stabiliseLength) {
 						// MODEL (for updating the history only, using dummy vectors)
-						cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history)" << endl;
+						cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history or length stabilisation)" << endl;
 						bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
 								dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
 								distinctNbest, rank, epoch);
 						decoder->cleanup();
+						cerr << endl;
+						dev_hypothesis_length += bestModel.size();
+						dev_reference_length += reference_length;
 					}

 					// FEAR
@ -576,6 +594,10 @@ int main(int argc, char** argv) {
 					oneBests.push_back(bestModel);
 					float model_length_ratio = (float)bestModel.size()/reference_length;
 					cerr << ", l-ratio model: " << model_length_ratio << endl;
+					if (stabiliseLength) {
+						dev_hypothesis_length += bestModel.size();
+						dev_reference_length += reference_length;
+					}

 					// FEAR
 					cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl;
@ -622,6 +644,19 @@ int main(int argc, char** argv) {
 				    break;
 				  }

+				// set word penalty to 0 before optimising (if 'stabilise-length' is active)
+				if (fixLength) {
+					iter = featureFunctions.begin();
+					for (; iter != featureFunctions.end(); ++iter) {
+						if ((*iter)->GetScoreProducerWeightShortName() == "w") {
+							ignoreWPFeature(featureValues, (*iter));
+							ignoreWPFeature(featureValuesHope, (*iter));
+							ignoreWPFeature(featureValuesFear, (*iter));
+							break;
+						}
+					}
+				}
+
 				// take logs of feature values
 				if (logFeatureValues) {
 					takeLogs(featureValuesHope, baseOfLog);
@ -654,24 +689,28 @@ int main(int argc, char** argv) {
 				// Run optimiser on batch:
 				VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
 				size_t update_status;
+				ScoreComponentCollection weightUpdate;
 				if (perceptron_update) {
 					vector<vector<float> > dummy1;
-					update_status = optimiser->updateWeightsHopeFear(mosesWeights,
+					update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
 							featureValuesHope, featureValuesFear, dummy1, dummy1, learning_rate, rank, epoch);
 				}
 				else if (hope_fear) {
-					update_status = optimiser->updateWeightsHopeFear(mosesWeights,
+					update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
 							featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, learning_rate, rank, epoch);
 				}
 				else {
 					// model_hope_fear
-					update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights,
+					update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, weightUpdate,
 							featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, learning_rate, rank, epoch);
 				}

 //			sumStillViolatedConstraints += update_status;

 				if (update_status == 0) {	 // if weights were updated
+					// apply weight update
+					mosesWeights.PlusEquals(weightUpdate);
+
 					if (normaliseWeights) {
 						mosesWeights.L1Normalise();
 					}
@ -690,6 +729,9 @@ int main(int argc, char** argv) {
 						mosesWeights = averageWeights;
 					}

+					if (delayUpdates)
+						delayedWeightUpdates.PlusEquals(weightUpdate);
+					else
 						// set new Moses weights
 						decoder->setWeights(mosesWeights);
 				}
@ -802,8 +844,25 @@ int main(int argc, char** argv) {
 			    }
 			  }
 			}// end dumping
+
 		} // end of shard loop, end of this epoch

+		if (delayUpdates) {
+			// apply all updates from this epoch to the weight vector
+			ScoreComponentCollection mosesWeights = decoder->getWeights();
+			mosesWeights.PlusEquals(delayedWeightUpdates);
+			decoder->setWeights(mosesWeights);
+			cerr << "Rank " << rank << ", epoch " << epoch << ", delayed update, new moses weights: " << mosesWeights << endl;
+		}
+
+		if (stabiliseLength && !fixLength) {
+			float lengthRatio = (float)(dev_hypothesis_length+1) / dev_reference_length;
+			if (lengthRatio >= 1) {
+				cerr << "Rank " << rank << ", epoch " << epoch << ", length ratio >= 1, fixing word penalty. " << endl;
+				fixLength = 1;
+			}
+		}
+
 		if (verbosity > 0) {
 			cerr << "Bleu feature history after epoch " <<  epoch << endl;
 			decoder->printBleuFeatureHistory(cerr);
@ -840,28 +899,19 @@ int main(int argc, char** argv) {
 				if (rank == 0 && (epoch >= 2)) {
 					ScoreComponentCollection firstDiff(mixedAverageWeights);
 					firstDiff.MinusEquals(mixedAverageWeightsPrevious);
-					VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << endl);
+					VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << 
+						" (max: " << firstDiff.GetLInfNorm() << ")" << endl);
 					ScoreComponentCollection secondDiff(mixedAverageWeights);
 					secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious);
-					VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << endl << endl);
+					VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << 
+						" (max: " << secondDiff.GetLInfNorm() << ")" << endl << endl);

 					// check whether stopping criterion has been reached
 					// (both difference vectors must have all weight changes smaller than min_weight_change)
-					FVector changes1 = firstDiff.GetScoresVector();
-					FVector changes2 = secondDiff.GetScoresVector();
-					FVector::const_iterator iterator1 = changes1.cbegin();
-					FVector::const_iterator iterator2 = changes2.cbegin();
-					while (iterator1 != changes1.cend()) {
-						if (abs((*iterator1).second) >= min_weight_change || abs(
-								(*iterator2).second) >= min_weight_change) {
+					if (firstDiff.GetLInfNorm() >= min_weight_change)
+					  reached = false;
+					if (secondDiff.GetLInfNorm() >= min_weight_change)
 					  reached = false;
-							break;
-						}
-
-						++iterator1;
-						++iterator2;
-					}
-
 					if (reached) {
 						// stop MIRA
 						stop = true;
@ -991,16 +1041,20 @@ void printFeatureValues(vector<vector<ScoreComponentCollection> > &featureValues
 }

 void ignoreCoreFeatures(vector<vector<ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap) {
-	for (size_t i = 0; i < featureValues.size(); ++i) {
+	for (size_t i = 0; i < featureValues.size(); ++i)
 		for (size_t j = 0; j < featureValues[i].size(); ++j) {
 			// set all core features to 0
 			StrFloatMap::iterator p;
 			for(p = coreWeightMap.begin(); p!=coreWeightMap.end(); ++p)
-			{
 				featureValues[i][j].Assign(p->first, 0);
 		}
-		}
-	}
+}
+
+void ignoreWPFeature(vector<vector<ScoreComponentCollection> > &featureValues, const ScoreProducer* sp) {
+	for (size_t i = 0; i < featureValues.size(); ++i)
+		for (size_t j = 0; j < featureValues[i].size(); ++j)
+			// set WP feature to 0
+			featureValues[i][j].Assign(sp, 0);
 }

 void takeLogs(vector<vector<ScoreComponentCollection> > &featureValues, size_t base) {
--- a/mira/Main.h
+++ b/mira/Main.h
@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 #include "ScoreComponentCollection.h"
 #include "Word.h"
+#include "ScoreProducer.h"

 typedef std::map<const std::string, float> StrFloatMap;
 typedef std::pair<const std::string, float> StrFloatPair;
@ -46,6 +47,7 @@ bool loadWeights(const std::string& filename, StrFloatMap& coreWeightMap);
 bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size);
 void printFeatureValues(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues);
 void ignoreCoreFeatures(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, StrFloatMap &coreWeightMap);
+void ignoreWPFeature(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, const Moses::ScoreProducer* sp);
 void takeLogs(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t base);
 void deleteTranslations(std::vector<std::vector<const Moses::Word*> > &translations);

--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@ -7,7 +7,9 @@ using namespace std;

 namespace Mira {

-size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
+size_t MiraOptimiser::updateWeights(
+		ScoreComponentCollection& currWeights,
+		ScoreComponentCollection& weightUpdate,
    const vector<vector<ScoreComponentCollection> >& featureValues,
    const vector<vector<float> >& losses,
    const vector<vector<float> >& bleuScores,
@ -142,9 +144,7 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	}

 	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
-
-	// apply update to weight vector
-	currWeights.PlusEquals(summedUpdate);
+	weightUpdate.PlusEquals(summedUpdate);

 	// Sanity check: are there still violated constraints after optimisation?
 /*	int violatedConstraintsAfter = 0;
@ -164,7 +164,9 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
 	return 0;
 }

-size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+size_t MiraOptimiser::updateWeightsHopeFear(
+		Moses::ScoreComponentCollection& currWeights,
+		Moses::ScoreComponentCollection& weightUpdate,
 		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
 		const std::vector<std::vector<float> >& bleuScoresHope,
@ -299,9 +301,7 @@ size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& cur
 	}

 	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
-
-	// apply update to weight vector
-	currWeights.PlusEquals(summedUpdate);
+	weightUpdate.PlusEquals(summedUpdate);

 	// Sanity check: are there still violated constraints after optimisation?
 /*	int violatedConstraintsAfter = 0;
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -30,7 +30,9 @@ namespace Mira {
    public:
      Optimiser() {}

-      virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+      virtual size_t updateWeightsHopeFear(
+      		Moses::ScoreComponentCollection& currWeights,
+      		Moses::ScoreComponentCollection& weightUpdate,
 				  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 				  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
 				  const std::vector<std::vector<float> >& bleuScoresHope,
@ -42,7 +44,9 @@ namespace Mira {
 
  class Perceptron : public Optimiser {
    public:
-			virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+			virtual size_t updateWeightsHopeFear(
+					Moses::ScoreComponentCollection& currWeights,
+					Moses::ScoreComponentCollection& weightUpdate,
 					const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 					const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
 					const std::vector<std::vector<float> >& bleuScoresHope,
@ -66,6 +70,7 @@ namespace Mira {
 		  m_margin_slack(margin_slack) { }
   
 	  size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
+	  								Moses::ScoreComponentCollection& weightUpdate,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
      						  const std::vector<std::vector<float> >& losses,
      						  const std::vector<std::vector<float> >& bleuScores,
@ -75,6 +80,7 @@ namespace Mira {
      						  size_t rank,
      						  size_t epoch);
     virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
+    		 	 	 	 	 	 	Moses::ScoreComponentCollection& weightUpdate,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
      						  const std::vector<std::vector<float> >& bleuScoresHope,
--- a/mira/Perceptron.cpp
+++ b/mira/Perceptron.cpp
@ -24,7 +24,9 @@ using namespace std;

 namespace Mira {

-size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
+size_t Perceptron::updateWeightsHopeFear(
+		ScoreComponentCollection& currWeights,
+		ScoreComponentCollection& weightUpdate,
 		const vector< vector<ScoreComponentCollection> >& featureValuesHope,
 		const vector< vector<ScoreComponentCollection> >& featureValuesFear,
 		const vector< vector<float> >& dummy1,
@ -39,7 +41,7 @@ size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
 	featureValueDiff.MinusEquals(featureValuesFear[0][0]);
 	cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
 	featureValueDiff.MultiplyEquals(perceptron_learning_rate);
-	currWeights.PlusEquals(featureValueDiff);
+	weightUpdate.PlusEquals(featureValueDiff);
 	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl;
 	return 0;
 }
--- a/mira/training-expt.perl
+++ b/mira/training-expt.perl
@ -72,6 +72,9 @@ my $moses_ini_file = &param_required("train.moses-ini-file");
 my $input_file = &param_required("train.input-file");
 &check_exists ("train input file", $input_file);
 my $reference_files = &param_required("train.reference-files");
+for my $ref (glob $reference_files . "*") {
+    &check_exists ("ref files", $ref);
+}
 my $trainer_exe = &param_required("train.trainer");
 &check_exists("Training executable", $trainer_exe);
 #my $weights_file = &param_required("train.weights-file");
@ -94,20 +97,21 @@ my $burn_in_reference_files = &param("train.burn-in-reference-files");
 my $skipTrain = &param("train.skip", 0);

 #devtest configuration
-my ($devtest_input_file, $devtest_reference_file,$devtest_ini_file,$bleu_script,$use_moses);
+my ($devtest_input_file, $devtest_reference_files,$devtest_ini_file,$bleu_script,$use_moses);
 my $test_exe = &param("devtest.moses");
 &check_exists("test executable", $test_exe);
 $bleu_script  = &param_required("devtest.bleu");
 &check_exists("multi-bleu script", $bleu_script);
 $devtest_input_file = &param_required("devtest.input-file");
-$devtest_reference_file = &param_required("devtest.reference-file");
 &check_exists ("devtest input file", $devtest_input_file);
-
-for my $ref (glob $devtest_reference_file . "*") {
+$devtest_reference_files = &param_required("devtest.reference-file");
+for my $ref (glob $devtest_reference_files . "*") {
    &check_exists ("devtest ref file", $ref);
 }
 $devtest_ini_file = &param_required("devtest.moses-ini-file");
 &check_exists ("devtest ini file", $devtest_ini_file);
+
+
 my $weight_file_stem = "$name-weights";
 my $extra_memory_devtest = &param("devtest.extra-memory",0);
 my $skip_devtest = &param("devtest.skip-devtest",0);
@ -174,8 +178,9 @@ my @refs;
 if (ref($reference_files) eq 'ARRAY') {
    @refs = @$reference_files;
 } else {
-    @refs = glob $reference_files;
+    @refs = glob $reference_files . "*"
 }
+my $arr_refs = \@refs;

 if (!$skipTrain) {
 #write the script
@ -198,7 +203,6 @@ print TRAIN "-f $moses_ini_file \\\n";
 print TRAIN "-i $input_file \\\n";

 for my $ref (@refs) {
-    &check_exists("train ref file",  $ref);
    print TRAIN "-r $ref ";
 }
 print TRAIN "\\\n";
@ -206,15 +210,15 @@ print TRAIN "\\\n";
 if ($burn_in) {
    print TRAIN "--burn-in 1 \\\n";
    print TRAIN "--burn-in-input-file $burn_in_input_file \\\n";
-    my @refs;
+    my @burnin_refs;
    if (ref($burn_in_reference_files) eq 'ARRAY') {
-	@refs = @$burn_in_reference_files;
+	@burnin_refs = @$burn_in_reference_files;
    } else {
-	@refs = glob $burn_in_reference_files;
+	@burnin_refs = glob $burn_in_reference_files . "*";
    }
-    for my $ref (@refs) {
-	&check_exists("burn-in ref file",  $ref);
-	print TRAIN "--burn-in-reference-files $ref ";
+    for my $burnin_ref (@burnin_refs) {
+	&check_exists("burn-in ref file",  $burnin_ref);
+	print TRAIN "--burn-in-reference-files $burnin_ref ";
    }
    print TRAIN "\\\n";
 }
@ -317,10 +321,10 @@ while(1) {
    my $suffix = "";
    print "weight file exists? ".(-e $new_weight_file)."\n";
    if (!$skip_devtest) {
-	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_file, $skip_submit_test);
+	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_files, $skip_submit_test);
    }
    if (!$skip_dev) {
-	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $refs[0], $skip_submit_test);
+	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $reference_files, $skip_submit_test);
    }
 }

--- a/moses-chart-cmd/src/IOWrapper.cpp
+++ b/moses-chart-cmd/src/IOWrapper.cpp
@ -47,6 +47,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "ChartHypothesis.h"
 #include "DotChart.h"

+#include <boost/algorithm/string.hpp>
+#include "FeatureVector.h"
+

 using namespace std;
 using namespace Moses;
@ -345,7 +348,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
    // print the surface factor of the translation
    out << translationId << " ||| ";
    OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
-    out << " |||";
+    out << " ||| ";

    // print the scores in a hardwired order
    // before each model type, the corresponding command-line-like name must be emitted
@ -362,18 +365,15 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
      }
    }

-
    std::string lastName = "";

    // translation components
    const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
    if (pds.size() > 0) {
-
      for( size_t i=0; i<pds.size(); i++ ) {
      	size_t pd_numinputscore = pds[i]->GetNumInputScores();
      	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
      	for (size_t j = 0; j<scores.size(); ++j){
-
      		if (labeledOutput && (i == 0) ){
      			if ((j == 0) || (j == pd_numinputscore)){
      				lastName =  pds[i]->GetScoreProducerWeightShortName(j);
@ -393,12 +393,10 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
    // generation
    const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
    if (gds.size() > 0) {
-
      for( size_t i=0; i<gds.size(); i++ ) {
      	size_t pd_numinputscore = gds[i]->GetNumInputScores();
      	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
      	for (size_t j = 0; j<scores.size(); ++j){
-
      		if (labeledOutput && (i == 0) ){
      			if ((j == 0) || (j == pd_numinputscore)){
      				lastName =  gds[i]->GetScoreProducerWeightShortName(j);
@ -410,9 +408,21 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
      }
    }

+    // output sparse features
+    lastName = "";
+    const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
+    for( size_t i=0; i<sff.size(); i++ )
+    	if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
+    		OutputSparseFeatureScores( out, path, sff[i], lastName );
+
+    const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
+    for( size_t i=0; i<slf.size(); i++ )
+    	if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
+    		OutputSparseFeatureScores( out, path, slf[i], lastName );
+

    // total
-    out << " |||" << path.GetTotalScore();
+    out << " ||| " << path.GetTotalScore();

    /*
    if (includeAlignment) {
@ -443,6 +453,32 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
  m_nBestOutputCollector->Write(translationId, out.str());
 }

+void IOWrapper::OutputSparseFeatureScores( std::ostream& out, const ChartTrellisPath &path, const FeatureFunction *ff, std::string &lastName )
+{
+  const StaticData &staticData = StaticData::Instance();
+  bool labeledOutput = staticData.IsLabeledNBestList();
+  const FVector scores = path.GetScoreBreakdown().GetVectorForProducer( ff );
+
+  // report weighted aggregate
+  if (! ff->GetSparseFeatureReporting()) {
+  	const FVector &weights = staticData.GetAllWeights().GetScoresVector();
+  	if (labeledOutput && !boost::contains(ff->GetScoreProducerDescription(), ":"))
+  		out << " " << ff->GetScoreProducerWeightShortName() << ":";
+    out << " " << scores.inner_product(weights);
+  }
+
+  // report each feature
+  else {
+  	for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
+  		if (i->second != 0) { // do not report zero-valued features
+  			if (labeledOutput)
+  				out << " " << i->first << ":";
+        out << " " << i->second;
+      }
+    }
+  }
+}
+
 void IOWrapper::FixPrecision(std::ostream &stream, size_t size)
 {
  stream.setf(std::ios::fixed);
--- a/moses-chart-cmd/src/IOWrapper.h
+++ b/moses-chart-cmd/src/IOWrapper.h
@ -44,6 +44,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "OutputCollector.h"
 #include "ChartHypothesis.h"

+#include "ChartTrellisPath.h"
+
 namespace Moses
 {
 class FactorCollection;
@ -82,6 +84,7 @@ public:
  void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
  void OutputBestHypo(const std::vector<const Moses::Factor*>&  mbrBestHypo, long translationId, bool reportSegmentation, bool reportAllFactors);
  void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartHypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId);
+  void OutputSparseFeatureScores(std::ostream& out, const Moses::ChartTrellisPath &path, const Moses::FeatureFunction *ff, std::string &lastName);
  void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, long translationId);
  void Backtrack(const Moses::ChartHypothesis *hypo);

--- a/moses-chart-cmd/src/Main.cpp
+++ b/moses-chart-cmd/src/Main.cpp
@ -165,18 +165,25 @@ bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source
 }
 static void PrintFeatureWeight(const FeatureFunction* ff)
 {
-
  size_t numScoreComps = ff->GetNumScoreComponents();
  if (numScoreComps != ScoreProducer::unlimited) {
    vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
-    for (size_t i = 0; i < numScoreComps; ++i) {
+    for (size_t i = 0; i < numScoreComps; ++i) 
      cout << ff->GetScoreProducerDescription() <<  " "
           << ff->GetScoreProducerWeightShortName() << " "
           << values[i] << endl;
  }
-  } else {
+}
+
+static void PrintSparseFeatureWeight(const FeatureFunction* ff)
+{
+  if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) {
+    if (ff->GetSparseProducerWeight() == 1)
      cout << ff->GetScoreProducerDescription() << " " <<
 	ff->GetScoreProducerWeightShortName() << " sparse" << endl;
+    else
+      cout << ff->GetScoreProducerDescription() << " " <<
+	ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl;
  }
 }

@ -201,6 +208,9 @@ static void ShowWeights()
  for (size_t i = 0; i < slf.size(); ++i) {
    PrintFeatureWeight(slf[i]);
  }
+  for (size_t i = 0; i < sff.size(); ++i) {
+    PrintSparseFeatureWeight(sff[i]);
+  }
 }


--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@ -287,21 +287,27 @@ private:

 static void PrintFeatureWeight(const FeatureFunction* ff)
 {
-
  size_t numScoreComps = ff->GetNumScoreComponents();
  if (numScoreComps != ScoreProducer::unlimited) {
    vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
-    for (size_t i = 0; i < numScoreComps; ++i) {
+    for (size_t i = 0; i < numScoreComps; ++i) 
      cout << ff->GetScoreProducerDescription() <<  " "
           << ff->GetScoreProducerWeightShortName() << " "
           << values[i] << endl;
  }
-  } else {
-    cout << ff->GetScoreProducerDescription() << " " <<
-         ff->GetScoreProducerWeightShortName() << " sparse" <<  endl;
-  }
 }

+static void PrintSparseFeatureWeight(const FeatureFunction* ff)
+{
+  if (ff->GetNumScoreComponents() == ScoreProducer::unlimited) {
+    if (ff->GetSparseProducerWeight() == 1)
+      cout << ff->GetScoreProducerDescription() << " " <<
+	ff->GetScoreProducerWeightShortName() << " sparse" <<  endl;
+    else
+      cout << ff->GetScoreProducerDescription() << " " <<
+	ff->GetScoreProducerWeightShortName() << " " << ff->GetSparseProducerWeight() << endl;
+  }
+}

 static void ShowWeights()
 {
@ -324,6 +330,9 @@ static void ShowWeights()
  for (size_t i = 0; i < gds.size(); ++i) {
    PrintFeatureWeight(gds[i]);
  }
+  for (size_t i = 0; i < sff.size(); ++i) {
+    PrintSparseFeatureWeight(sff[i]);
+  }
 }

 /** main function of the command line version of the decoder **/
--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@ -81,11 +81,13 @@ void BleuScoreFeature::PrintHistory(std::ostream& out) const {
  }
 }

-void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
+void BleuScoreFeature::SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
+		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
 		float scaleByX, float historySmoothing, size_t scheme, float relaxBP) {
 	m_scale_by_input_length = scaleByInputLength;
 	m_scale_by_ref_length = scaleByRefLength;
-	m_scale_by_target_length = scaleByTargetLength;
+	m_scale_by_target_length_linear = scaleByTargetLengthLinear;
+	m_scale_by_target_length_trend = scaleByTargetLengthTrend;
 	m_scale_by_avg_length = scaleByAvgLength;
 	m_scale_by_x = scaleByX;
 	m_historySmoothing = historySmoothing;
@ -97,6 +99,7 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
 {
 	m_refs.clear();
    FactorCollection& fc = FactorCollection::Instance();
+    cerr << "Number of reference files: " << refs.size() << endl; 
    for (size_t file_id = 0; file_id < refs.size(); file_id++) {
      for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
          const string& ref = refs[file_id][ref_id];
@ -430,13 +433,19 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
  else if (m_scale_by_ref_length) {
    precision *= m_ref_length_history + m_cur_ref_length;
  }
-  else if (m_scale_by_target_length) {
-  	precision *= m_target_length_history + state->m_target_length;
+  else if (m_scale_by_target_length_linear) {
+  	// length of current hypothesis + number of words still to translate from source (rest being translated 1-to-1)
+  	float scaled_target_length = state->m_target_length + (m_cur_source_length - state->m_source_length);
+  	precision *= m_target_length_history + scaled_target_length;
+  }
+  else if (m_scale_by_target_length_trend) {
+  	// length of full target if remaining words were translated with the same fertility as so far
+  	float scaled_target_length = ((float)m_cur_source_length/state->m_source_length) * state->m_target_length;
+  	precision *= m_target_length_history + scaled_target_length;
  }
  else if (m_scale_by_avg_length) {
    precision *= (m_source_length_history + m_ref_length_history + m_cur_source_length +  + m_cur_ref_length) / 2;
  }
-
  return precision*m_scale_by_x;
 }

--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@ -80,7 +80,8 @@ public:
    void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
    void PrintReferenceLength(const std::vector<size_t>& ref_ids);
    size_t GetReferenceLength(size_t ref_id);
-    void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength, bool scaleByTargetLength,
+    void SetBleuParameters(bool scaleByInputLength, bool scaleByRefLength, bool scaleByAvgLength,
+    		bool scaleByTargetLengthLinear, bool scaleByTargetLengthTrend,
  		  float scaleByX, float historySmoothing, size_t scheme, float relaxBP);
    void GetNgramMatchCounts(Phrase&,
                             const NGrams&,
@ -125,8 +126,11 @@ private:
    // scale BLEU score by (history of) reference length
    bool m_scale_by_ref_length;

-    // scale BLEU score by (history of) target length
-    bool m_scale_by_target_length;
+    // scale BLEU score by (history of) target length (linear future estimate)
+    bool m_scale_by_target_length_linear;
+
+    // scale BLEU score by (history of) target length (trend-based future estimate)
+        bool m_scale_by_target_length_trend;

    // scale BLEU score by (history of) the average of input and reference length
    bool m_scale_by_avg_length;
--- a/moses/src/FeatureVector.cpp
+++ b/moses/src/FeatureVector.cpp
@ -255,17 +255,10 @@ namespace Moses {
  }

  FVector& FVector::operator+= (const FVector& rhs) {
-    if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
+    if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
      resize(rhs.m_coreFeatures.size());
-    }
-    for (iterator i = begin(); i != end(); ++i) {
-      set(i->first,i->second + rhs.get(i->first));
-    }
-    for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
-      if (!hasNonDefaultValue(i->first)) {
-        set(i->first,i->second);
-      }
-    }
+    for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+    	set(i->first, get(i->first) + i->second);
    for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
      if (i < rhs.m_coreFeatures.size()) {
        m_coreFeatures[i] += rhs.m_coreFeatures[i];
@ -275,17 +268,10 @@ namespace Moses {
  }
  
  FVector& FVector::operator-= (const FVector& rhs) {
-    if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
+    if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
      resize(rhs.m_coreFeatures.size());
-    }
-    for (iterator i = begin(); i != end(); ++i) {
-      set(i->first,i->second - rhs.get(i->first));
-    }
-    for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
-      if (!hasNonDefaultValue(i->first)) {
-        set(i->first,-(i->second));
-      }
-    }
+    for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+    	set(i->first, get(i->first) -(i->second));
    for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
      if (i < rhs.m_coreFeatures.size()) {
        m_coreFeatures[i] -= rhs.m_coreFeatures[i];
@ -336,28 +322,6 @@ namespace Moses {
    return *this;
  }
  
-	FVector& FVector::max_equals(const FVector& rhs) {
-    if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
-      resize(rhs.m_coreFeatures.size());
-    }
-		for (iterator i = begin(); i != end(); ++i) {
-		  set(i->first, max(i->second , rhs.get(i->first) ));
-		}
-		for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
-      if (!hasNonDefaultValue(i->first)) {
-			  set(i->first, i->second);
-      }
-		}
-    for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
-      if (i < rhs.m_coreFeatures.size()) {
-        m_coreFeatures[i] = max(m_coreFeatures[i], rhs.m_coreFeatures[i]);
-      } else {
-        m_coreFeatures[i] = max(m_coreFeatures[i],(float)0);
-      }
-    }
-		return *this;
-	}
-  
  FVector& FVector::operator*= (const FValue& rhs) {
    //NB Could do this with boost::bind ?
    for (iterator i = begin(); i != end(); ++i) {
@ -367,7 +331,6 @@ namespace Moses {
    return *this;
  }
  
-  
  FVector& FVector::operator/= (const FValue& rhs) {
    for (iterator i = begin(); i != end(); ++i) {
      i->second /= rhs;
@ -387,6 +350,25 @@ namespace Moses {
    return norm;
  }
  
+  FValue FVector::l2norm() const {
+    return sqrt(inner_product(*this));
+  }
+
+  FValue FVector::linfnorm() const {
+    FValue norm = 0;
+    for (const_iterator i = cbegin(); i != cend(); ++i) {
+      float absValue = abs(i->second);
+      if (absValue > norm)
+	norm = absValue;
+    }
+    for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+      float absValue = m_coreFeatures[i];
+      if (absValue > norm)
+	norm = absValue;
+    }
+    return norm;
+  }
+
  FValue FVector::sum() const {
    FValue sum = 0;
    for (const_iterator i = cbegin(); i != cend(); ++i) {
@ -396,10 +378,6 @@ namespace Moses {
    return sum;
  }
    
-  FValue FVector::l2norm() const {
-    return sqrt(inner_product(*this));
-  }
-  
  FValue FVector::inner_product(const FVector& rhs) const {
    CHECK(m_coreFeatures.size() == rhs.m_coreFeatures.size());
    FValue product = 0.0;
@ -437,10 +415,6 @@ namespace Moses {
    return FVector(lhs) /= rhs;
  }

-  const FVector fvmax(const FVector& lhs, const FVector& rhs) {
-    return FVector(lhs).max_equals(rhs);
-  }
-  
  FValue inner_product(const FVector& lhs, const FVector& rhs) {
    if (lhs.size() >= rhs.size()) {
      return rhs.inner_product(lhs);
--- a/moses/src/FeatureVector.h
+++ b/moses/src/FeatureVector.h
@ -177,6 +177,7 @@ namespace Moses {
    /** norms and sums */
    FValue l1norm() const;
    FValue l2norm() const;
+    FValue linfnorm() const;
    FValue sum() const;
    
    /** pretty printing */
@ -292,6 +293,10 @@ namespace Moses {
      return (m_fv->m_features[m_name] += lhs);
    }
    
+    FValue operator -=(FValue lhs) {
+      return (m_fv->m_features[m_name] -= lhs);
+    }
+
  private:
    FValue m_tmp;
    
--- a/moses/src/FeatureVectorTest.cpp
+++ b/moses/src/FeatureVectorTest.cpp
@ -224,26 +224,6 @@ BOOST_AUTO_TEST_CASE(core_scalar)

 }

-BOOST_AUTO_TEST_CASE(core_max) 
-{
-  FVector f1(2);
-  FVector f2(2);
-  FName n1("a");
-  FName n2("b");
-  FName n3("c");
-  f1[0] = 1.1; f1[1] = -0.1; ; f1[n2] = -1.5; f1[n3] = 2.2;
-  f2[0] = 0.5; f2[1] = 0.25; f2[n1] = 1; f2[n3] = 2.4;
-
-  FVector m = fvmax(f1,f2);
-
-  BOOST_CHECK_CLOSE((FValue)m[0], 1.1 , TOL);
-  BOOST_CHECK_CLOSE((FValue)m[1], 0.25 , TOL);
-  BOOST_CHECK_CLOSE((FValue)m[n1], 1 , TOL);
-  BOOST_CHECK_CLOSE((FValue)m[n2],0  , TOL);
-  BOOST_CHECK_CLOSE((FValue)m[n3],2.4  , TOL);
-
-}
-
 BOOST_AUTO_TEST_CASE(l1norm) 
 {
  FVector f1(3);
--- a/moses/src/ScoreComponentCollection.cpp
+++ b/moses/src/ScoreComponentCollection.cpp
@ -63,8 +63,8 @@ void ScoreComponentCollection::MultiplyEquals(float scalar)

 // Multiply all weights of this sparse producer by a given scalar
 void ScoreComponentCollection::MultiplyEquals(const ScoreProducer* sp, float scalar) {
-	CHECK(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
-  std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP;
+	assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
+  std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
  for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
    std::stringstream name;
    name << i->first;
@ -100,6 +100,10 @@ float ScoreComponentCollection::GetL2Norm() const {
  return m_scores.l2norm();
 }

+float ScoreComponentCollection::GetLInfNorm() const {
+  return m_scores.linfnorm();
+}
+
 void ScoreComponentCollection::Save(ostream& out) const {
  ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
  for (; iter != s_scoreIndexes.end(); ++iter ) {
--- a/moses/src/ScoreComponentCollection.h
+++ b/moses/src/ScoreComponentCollection.h
@ -150,6 +150,21 @@ public:
 	  m_scores -= rhs.m_scores;
 	}

+  //For features which have an unbounded number of components
+  void MinusEquals(const ScoreProducer*sp, const std::string& name, float score)
+  {
+    assert(sp->GetNumScoreComponents() == ScoreProducer::unlimited);
+    FName fname(sp->GetScoreProducerDescription(),name);
+    m_scores[fname] -= score;
+  }
+
+  //For features which have an unbounded number of components
+  void SparseMinusEquals(const std::string& full_name, float score)
+  {
+    FName fname(full_name);
+    m_scores[fname] -= score;
+  }
+

 	//! Add scores from a single ScoreProducer only
 	//! The length of scores must be equal to the number of score components
@ -192,6 +207,13 @@ public:
    m_scores[fname] += score;
  }

+  //For features which have an unbounded number of components
+  void SparsePlusEquals(const std::string& full_name, float score)
+  {
+  	FName fname(full_name);
+    m_scores[fname] += score;
+  }
+
 	void Assign(const ScoreProducer* sp, const std::vector<float>& scores)
 	{
    IndexPair indexes = GetIndexes(sp);
@ -307,6 +329,7 @@ public:
  void L1Normalise();
  float GetL1Norm() const;
  float GetL2Norm() const;
+  float GetLInfNorm() const;
  void Save(const std::string& filename) const;
  void Save(std::ostream&) const;

--- a/moses/src/ScoreProducer.h
+++ b/moses/src/ScoreProducer.h
@ -54,6 +54,8 @@ public:

  void SetSparseFeatureReporting() { m_reportSparseFeatures = true; }
  bool GetSparseFeatureReporting() const { return m_reportSparseFeatures; } 
+
+  virtual float GetSparseProducerWeight() const { return 1; }
 };


--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@ -1442,7 +1442,7 @@ bool StaticData::LoadReferences()
    }
    string line;
    while (getline(in,line)) {
-      references.back().push_back(line);
+      references[i].push_back(line);
    }
    if (i > 0) {
      if (references[i].size() != references[i-1].size()) {
@ -1459,14 +1459,12 @@ bool StaticData::LoadReferences()

 bool StaticData::LoadDiscrimLMFeature()
 {
-	cerr << "Loading discriminative language models.. ";
-
 	// only load if specified
  const vector<string> &wordFile = m_parameter->GetParam("discrim-lmodel-file");
  if (wordFile.empty()) {
    return true;
  }
-  cerr << wordFile.size() << " models" << endl;
+  cerr << "Loading " << wordFile.size() << " discriminative language model(s).." << endl;

  // if this weight is specified, the sparse DLM weights will be scaled with an additional weight
  vector<string> dlmWeightStr = m_parameter->GetParam("weight-dlm");
@ -1495,6 +1493,11 @@ bool StaticData::LoadDiscrimLMFeature()
  		}
  	}
  	else {
+  		if (m_searchAlgorithm == ChartDecoding && !include_lower_ngrams) {
+  			UserMessage::Add("Excluding lower order DLM ngrams is currently not supported for chart decoding.");
+  			return false;
+  		}
+
  		m_targetNgramFeatures.push_back(new TargetNgramFeature(factorId, order, include_lower_ngrams));
  		if (i < dlmWeights.size())
  			m_targetNgramFeatures[i]->SetSparseProducerWeight(dlmWeights[i]);
--- a/moses/src/TargetNgramFeature.cpp
+++ b/moses/src/TargetNgramFeature.cpp
@ -3,6 +3,7 @@
 #include "TargetPhrase.h"
 #include "Hypothesis.h"
 #include "ScoreComponentCollection.h"
+#include "ChartHypothesis.h"

 namespace Moses {

@ -45,7 +46,7 @@ bool TargetNgramFeature::Load(const std::string &filePath)

  std::string line;
  m_vocab.insert(BOS_);
-  m_vocab.insert(BOS_);
+  m_vocab.insert(EOS_);
  while (getline(inFile, line)) {
    m_vocab.insert(line);
  }
@ -54,10 +55,9 @@ bool TargetNgramFeature::Load(const std::string &filePath)
  return true;
 }

-
 string TargetNgramFeature::GetScoreProducerWeightShortName(unsigned) const
 {
-	return "dlmn";
+	return "dlm";
 }

 size_t TargetNgramFeature::GetNumInputScores() const
@ -65,7 +65,6 @@ size_t TargetNgramFeature::GetNumInputScores() const
 	return 0;
 }

-
 const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input*/) const
 {
 	vector<Word> bos(1,m_bos);
@ -76,8 +75,8 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
                                       const FFState* prev_state,
                                       ScoreComponentCollection* accumulator) const
 {
-  const TargetNgramState* tnState = dynamic_cast<const TargetNgramState*>(prev_state);
-  CHECK(tnState);
+  const TargetNgramState* tnState = static_cast<const TargetNgramState*>(prev_state);
+  assert(tnState);

  // current hypothesis target phrase
  const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
@ -85,7 +84,7 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,

  // extract all ngrams from current hypothesis
  vector<Word> prev_words = tnState->GetWords();
-  string curr_ngram;
+  stringstream curr_ngram;
  bool skip = false;

  // include lower order ngrams?
@ -94,7 +93,9 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,

  for (size_t n = m_n; n >= smallest_n; --n) { // iterate over ngram size
  	for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
-  		const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString();
+//  		const string& curr_w = targetPhrase.GetWord(i).GetFactor(m_factorType)->GetString();
+  		const string& curr_w = targetPhrase.GetWord(i).GetString(m_factorType);
+
  		if (m_vocab.size() && (m_vocab.find(curr_w) == m_vocab.end())) continue; // skip ngrams

  		if (n > 1) {
@ -129,23 +130,23 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
      }

  		if (!skip) {
-  			curr_ngram.append(curr_w);
-  			accumulator->PlusEquals(this,curr_ngram,1);
+  			curr_ngram << curr_w;
+  			accumulator->PlusEquals(this,curr_ngram.str(),1);
      }
-  		curr_ngram.clear();
+  		curr_ngram.str("");
  	}
  }

  if (cur_hypo.GetWordsBitmap().IsComplete()) {
  	for (size_t n = m_n; n >= smallest_n; --n) {
-  		string last_ngram;
+  		stringstream last_ngram;
  		skip = false;
  		for (size_t i = cur_hypo.GetSize() - n + 1; i <  cur_hypo.GetSize() && !skip; ++i)
  			appendNgram(cur_hypo.GetWord(i), skip, last_ngram);

  		if (n > 1 && !skip) {
-  			last_ngram.append(EOS_);
-  			accumulator->PlusEquals(this,last_ngram,1);
+  			last_ngram << EOS_;
+  			accumulator->PlusEquals(this, last_ngram.str(), 1);
    	}
  	}
  	return NULL;
@ -169,13 +170,267 @@ FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
  return new TargetNgramState(new_prev_words);
 }

-void TargetNgramFeature::appendNgram(const Word& word, bool& skip, string& ngram) const {
-	const string& w = word.GetFactor(m_factorType)->GetString();
+void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream &ngram) const {
+//	const string& w = word.GetFactor(m_factorType)->GetString();
+	const string& w = word.GetString(m_factorType);
 	if (m_vocab.size() && (m_vocab.find(w) == m_vocab.end())) skip = true;
 	else {
-		ngram.append(w);
-		ngram.append(":");
+		ngram << w;
+		ngram << ":";
 	}
 }
+
+FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const
+{
+  vector<const Word*> contextFactor;
+  contextFactor.reserve(m_n);
+
+  // get index map for underlying hypotheses
+  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+    cur_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
+
+  // loop over rule
+  bool makePrefix = false;
+  bool makeSuffix = false;
+  bool collectForPrefix = true;
+  size_t prefixTerminals = 0;
+  size_t suffixTerminals = 0;
+  bool onlyTerminals = true;
+  bool prev_is_NT = false;
+  size_t prev_subPhraseLength = 0;
+  for (size_t phrasePos = 0; phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize(); phrasePos++)
+  {
+    // consult rule for either word or non-terminal
+    const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(phrasePos);
+//    cerr << "word: " << word << endl;
+
+    // regular word
+    if (!word.IsNonTerminal()) {
+    	contextFactor.push_back(&word);
+    	prev_is_NT = false;
+
+      if (phrasePos==0)
+      	makePrefix = true;
+      if (phrasePos==cur_hypo.GetCurrTargetPhrase().GetSize()-1 || prev_is_NT)
+      	makeSuffix = true;
+      
+      // beginning/end of sentence symbol <s>,</s>?
+      string factorZero = word.GetString(0);
+      if (factorZero.compare("<s>") == 0)
+      	prefixTerminals++;
+      // end of sentence symbol </s>?
+      else if (factorZero.compare("</s>") == 0)
+      	suffixTerminals++;
+      // everything else
+      else {
+      	stringstream ngram;
+      	ngram << m_baseName;
+      	if (m_factorType == 0)
+      		ngram << factorZero;
+      	else
+      		ngram << word.GetString(m_factorType);
+      	accumulator->SparsePlusEquals(ngram.str(), 1);
+
+      	if (collectForPrefix)
+      		prefixTerminals++;
+      	else
+      		suffixTerminals++;
+      }
+    }
+
+    // non-terminal, add phrase from underlying hypothesis
+    else if (m_n > 1)
+    {
+      // look up underlying hypothesis
+      size_t nonTermIndex = nonTermIndexMap[phrasePos];
+      const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermIndex);
+
+      const TargetNgramChartState* prevState =
+      		static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId));
+      size_t subPhraseLength = prevState->GetNumTargetTerminals();
+
+      // special case: rule starts with non-terminal
+      if (phrasePos == 0) {
+      	if (subPhraseLength == 1) {
+      		makePrefix = true;
+      		++prefixTerminals;
+
+      		const Word &word = prevState->GetSuffix().GetWord(0);
+//      		cerr << "NT0 --> : " << word << endl;
+      		contextFactor.push_back(&word);
+      	}
+      	else {
+      		onlyTerminals = false;
+      		collectForPrefix = false;
+      		int suffixPos = prevState->GetSuffix().GetSize() - (m_n-1);
+      		if (suffixPos < 0) suffixPos = 0; // push all words if less than order
+      		for(;(size_t)suffixPos < prevState->GetSuffix().GetSize(); suffixPos++)
+      		{
+      			const Word &word = prevState->GetSuffix().GetWord(suffixPos);
+//      			cerr << "NT0 --> : " << word << endl;
+      			contextFactor.push_back(&word);
+      		}
+      	}
+      }
+
+      // internal non-terminal
+      else
+      {
+      	// push its prefix
+      	for(size_t prefixPos = 0; prefixPos < m_n-1
+              && prefixPos < subPhraseLength; prefixPos++)
+        {
+          const Word &word = prevState->GetPrefix().GetWord(prefixPos);
+//          cerr << "NT --> " << word << endl;
+          contextFactor.push_back(&word);
+        }
+
+      	if (subPhraseLength==1) {
+      		if (collectForPrefix)
+      			++prefixTerminals;
+      		else
+      			++suffixTerminals;
+
+      		if (phrasePos == cur_hypo.GetCurrTargetPhrase().GetSize()-1)
+      			makeSuffix = true;
+      	}
+      	else {
+      		onlyTerminals = false;
+      		collectForPrefix = true;
+
+      		// check if something follows this NT
+      		bool wordFollowing = (phrasePos < cur_hypo.GetCurrTargetPhrase().GetSize() - 1)? true : false;
+
+      		// check if we are dealing with a large sub-phrase
+      		if (wordFollowing && subPhraseLength > m_n - 1)
+      		{
+      			// clear up pending ngrams
+      			MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
+      			contextFactor.clear();
+      			makePrefix = false;
+      			makeSuffix = true;
+      			collectForPrefix = false;
+      			prefixTerminals = 0;
+      			suffixTerminals = 0;
+
+      			// push its suffix
+      			size_t remainingWords = (remainingWords > m_n-1) ? m_n-1 : subPhraseLength - (m_n-1);
+      			for(size_t suffixPos = 0; suffixPos < prevState->GetSuffix().GetSize(); suffixPos++) {
+      				const Word &word = prevState->GetSuffix().GetWord(suffixPos);
+//      				cerr << "NT --> : " << word << endl;
+      				contextFactor.push_back(&word);
+      			}
+      		}
+      		// subphrase can be used as suffix and as prefix for the next part
+      		else if (wordFollowing && subPhraseLength == m_n - 1)
+      		{
+      			// clear up pending ngrams
+      			MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
+      			makePrefix = false;
+      			makeSuffix = true;
+      			collectForPrefix = false;
+      			prefixTerminals = 0;
+      			suffixTerminals = 0;
+      		}
+      		else if (prev_is_NT && prev_subPhraseLength > 1 && subPhraseLength > 1) {
+      			// two NTs in a row: make transition
+      			MakePrefixNgrams(contextFactor, accumulator, 1, m_n-2);
+      			MakeSuffixNgrams(contextFactor, accumulator, 1, m_n-2);
+      			makePrefix = false;
+      			makeSuffix = false;
+      			collectForPrefix = false;
+      			prefixTerminals = 0;
+      			suffixTerminals = 0;
+		
+      			// remove duplicates
+      			stringstream curr_ngram;
+      			curr_ngram << m_baseName;
+      			curr_ngram << (*contextFactor[m_n-2]).GetString(m_factorType);
+      			curr_ngram << ":";
+      			curr_ngram << (*contextFactor[m_n-1]).GetString(m_factorType);
+      			accumulator->SparseMinusEquals(curr_ngram.str(),1);
+      		}
+      	}
+      }
+      prev_is_NT = true;
+      prev_subPhraseLength = subPhraseLength;
+    }
+  }
+
+  if (m_n > 1) {
+    if (onlyTerminals) {
+    	MakePrefixNgrams(contextFactor, accumulator, prefixTerminals-1);
+    }
+    else {
+      if (makePrefix)
+      	MakePrefixNgrams(contextFactor, accumulator, prefixTerminals);
+      if (makeSuffix)
+      	MakeSuffixNgrams(contextFactor, accumulator, suffixTerminals);
+
+      // remove duplicates
+      size_t size = contextFactor.size();
+      if (makePrefix && makeSuffix && (size <= m_n)) {
+      	stringstream curr_ngram;
+      	curr_ngram << m_baseName;
+      	for (size_t i = 0; i < size; ++i) {
+      		curr_ngram << (*contextFactor[i]).GetString(m_factorType);
+      		if (i < size-1)
+      			curr_ngram << ":";
+      	}
+      	accumulator->SparseMinusEquals(curr_ngram.str(), 1);
+      }
+    }
+  }
+
+//  cerr << endl;
+  return new TargetNgramChartState(cur_hypo, featureId, m_n);
+}
+
+void TargetNgramFeature::MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfStartPos, size_t offset) const {
+	stringstream ngram;
+	size_t size = contextFactor.size();
+  for (size_t k = 0; k < numberOfStartPos; ++k) {
+    size_t max_end = (size < m_n+k+offset)? size: m_n+k+offset;
+    for (size_t end_pos = 1+k+offset; end_pos < max_end; ++end_pos) {
+      ngram << m_baseName;
+    	for (size_t i=k+offset; i <= end_pos; ++i) {
+      	if (i > k+offset)
+      		ngram << ":";
+        string factorZero = (*contextFactor[i]).GetString(0);
+        if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
+      		ngram << factorZero;
+      	else
+      		ngram << (*contextFactor[i]).GetString(m_factorType);
+      	const Word w = *contextFactor[i];
+      }
+//      cerr << "p-ngram: " << ngram.str() << endl;
+      accumulator->SparsePlusEquals(ngram.str(), 1);
+      ngram.str("");
+    }
+  }
+}
+
+void TargetNgramFeature::MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator, size_t numberOfEndPos, size_t offset) const {
+	stringstream ngram;
+  for (size_t k = 0; k < numberOfEndPos; ++k) {
+    size_t end_pos = contextFactor.size()-1-k-offset;
+    for (int start_pos=end_pos-1; (start_pos >= 0) && (end_pos-start_pos < m_n); --start_pos) {
+    	ngram << m_baseName;
+    	for (size_t j=start_pos; j <= end_pos; ++j){
+    		string factorZero = (*contextFactor[j]).GetString(0);
+    		if (m_factorType == 0 || factorZero.compare("<s>") == 0 || factorZero.compare("</s>") == 0)
+    			ngram << factorZero;
+    		else
+    			ngram << (*contextFactor[j]).GetString(m_factorType);
+    		if (j < end_pos)
+      		ngram << ":";
+    	}
+//      cerr << "s-ngram: " << ngram.str() << endl;
+      accumulator->SparsePlusEquals(ngram.str(), 1);
+      ngram.str("");
+    }
+  }
+}
+
 }

--- a/moses/src/TargetNgramFeature.h
+++ b/moses/src/TargetNgramFeature.h
@ -9,6 +9,10 @@
 #include "FFState.h"
 #include "Word.h"

+#include "LM/SingleFactor.h"
+#include "ChartHypothesis.h"
+#include "ChartManager.h"
+
 namespace Moses
 {

@ -22,43 +26,190 @@ class TargetNgramState : public FFState {
    std::vector<Word> m_words;
 };

+class TargetNgramChartState : public FFState
+{
+private:
+  Phrase m_contextPrefix, m_contextSuffix;
+
+  size_t m_numTargetTerminals; // This isn't really correct except for the surviving hypothesis
+
+  size_t m_startPos, m_endPos, m_inputSize;
+
+  /** Construct the prefix string of up to specified size
+   * \param ret prefix string
+   * \param size maximum size (typically max lm context window)
+   */
+  size_t CalcPrefix(const ChartHypothesis &hypo, const int featureId, Phrase &ret, size_t size) const
+  {
+    const TargetPhrase &target = hypo.GetCurrTargetPhrase();
+    const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+      target.GetAlignmentInfo().GetNonTermIndexMap();
+
+    // loop over the rule that is being applied
+    for (size_t pos = 0; pos < target.GetSize(); ++pos) {
+      const Word &word = target.GetWord(pos);
+
+      // for non-terminals, retrieve it from underlying hypothesis
+      if (word.IsNonTerminal()) {
+        size_t nonTermInd = nonTermIndexMap[pos];
+        const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
+        size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcPrefix(*prevHypo, featureId, ret, size);
+//        Phrase phrase = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->GetPrefix();
+//        size = phrase.GetSize();
+      }
+      // for words, add word
+      else {
+        ret.AddWord(word);
+        size--;
+      }
+
+      // finish when maximum length reached
+      if (size==0)
+        break;
+    }
+
+    return size;
+  }
+
+  /** Construct the suffix phrase of up to specified size
+   * will always be called after the construction of prefix phrase
+   * \param ret suffix phrase
+   * \param size maximum size of suffix
+   */
+  size_t CalcSuffix(const ChartHypothesis &hypo, int featureId, Phrase &ret, size_t size) const
+  {
+  	size_t prefixSize = m_contextPrefix.GetSize();
+    assert(prefixSize <= m_numTargetTerminals);
+
+    // special handling for small hypotheses
+    // does the prefix match the entire hypothesis string? -> just copy prefix
+    if (prefixSize == m_numTargetTerminals) {
+      size_t maxCount = std::min(prefixSize, size);
+      size_t pos= prefixSize - 1;
+
+      for (size_t ind = 0; ind < maxCount; ++ind) {
+        const Word &word = m_contextPrefix.GetWord(pos);
+        ret.PrependWord(word);
+        --pos;
+      }
+
+      size -= maxCount;
+      return size;
+    }
+    // construct suffix analogous to prefix
+    else {
+    	const TargetPhrase targetPhrase = hypo.GetCurrTargetPhrase();
+      const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+      		targetPhrase.GetAlignmentInfo().GetNonTermIndexMap();
+      for (int pos = (int) targetPhrase.GetSize() - 1; pos >= 0 ; --pos) {
+        const Word &word = targetPhrase.GetWord(pos);
+
+        if (word.IsNonTerminal()) {
+          size_t nonTermInd = nonTermIndexMap[pos];
+          const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermInd);
+          size = static_cast<const TargetNgramChartState*>(prevHypo->GetFFState(featureId))->CalcSuffix(*prevHypo, featureId, ret, size);
+        }
+        else {
+          ret.PrependWord(word);
+          size--;
+        }
+
+        if (size==0)
+          break;
+      }
+
+      return size;
+    }
+  }
+
+public:
+  TargetNgramChartState(const ChartHypothesis &hypo, int featureId, size_t order)
+      :m_contextPrefix(order - 1),
+      m_contextSuffix(order - 1)
+  {
+    m_numTargetTerminals = hypo.GetCurrTargetPhrase().GetNumTerminals();
+    const WordsRange range = hypo.GetCurrSourceRange();
+    m_startPos = range.GetStartPos();
+    m_endPos = range.GetEndPos();
+    m_inputSize = hypo.GetManager().GetSource().GetSize();
+
+    const std::vector<const ChartHypothesis*> prevHypos = hypo.GetPrevHypos();
+    for (std::vector<const ChartHypothesis*>::const_iterator i = prevHypos.begin(); i != prevHypos.end(); ++i) {
+      // keep count of words (= length of generated string)
+      m_numTargetTerminals += static_cast<const TargetNgramChartState*>((*i)->GetFFState(featureId))->GetNumTargetTerminals();
+    }
+
+    CalcPrefix(hypo, featureId, m_contextPrefix, order - 1);
+    CalcSuffix(hypo, featureId, m_contextSuffix, order - 1);
+  }
+
+  size_t GetNumTargetTerminals() const {
+    return m_numTargetTerminals;
+  }
+
+  const Phrase &GetPrefix() const {
+    return m_contextPrefix;
+  }
+  const Phrase &GetSuffix() const {
+    return m_contextSuffix;
+  }
+
+  int Compare(const FFState& o) const {
+    const TargetNgramChartState &other =
+      static_cast<const TargetNgramChartState &>( o );
+
+    // prefix
+    if (m_startPos > 0) // not for "<s> ..."
+    {
+      int ret = GetPrefix().Compare(other.GetPrefix());
+      if (ret != 0)
+        return ret;
+    }
+
+    if (m_endPos < m_inputSize - 1)// not for "... </s>"
+    {
+      int ret = GetSuffix().Compare(other.GetSuffix());
+      if (ret != 0)
+        return ret;
+    }
+    return 0;
+  }
+};
+
 /** Sets the features of observed ngrams.
 */
 class TargetNgramFeature : public StatefulFeatureFunction {
 public:
 	TargetNgramFeature(FactorType factorType = 0, size_t n = 3, bool lower_ngrams = true):
-     StatefulFeatureFunction("dlmn", ScoreProducer::unlimited),
+     StatefulFeatureFunction("dlm", ScoreProducer::unlimited),
     m_factorType(factorType),
     m_n(n),
     m_lower_ngrams(lower_ngrams),
     m_sparseProducerWeight(1)
  {
    FactorCollection& factorCollection = FactorCollection::Instance();
-    const Factor* bosFactor =
-       factorCollection.AddFactor(Output,m_factorType,BOS_);
+    const Factor* bosFactor = factorCollection.AddFactor(Output,m_factorType,BOS_);
    m_bos.SetFactor(m_factorType,bosFactor);
+    m_baseName = GetScoreProducerDescription();
+    m_baseName.append("_");
  }

-
 	bool Load(const std::string &filePath);

 	std::string GetScoreProducerWeightShortName(unsigned) const;
 	size_t GetNumInputScores() const;

  void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
-  float GetSparseProducerWeight() { return m_sparseProducerWeight; }
+  float GetSparseProducerWeight() const { return m_sparseProducerWeight; }

 	virtual const FFState* EmptyHypothesisState(const InputType &input) const;

 	virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
 	                          ScoreComponentCollection* accumulator) const;

-  virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
-                                  int /* featureID */,
-                                  ScoreComponentCollection* ) const
-                                  {
-                                    abort();
-                                  }
+  virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
+                                  ScoreComponentCollection* accumulator) const;
+
 private:
  FactorType m_factorType;
  Word m_bos;
@ -69,7 +220,13 @@ private:
 	// additional weight that all sparse weights are scaled with
 	float m_sparseProducerWeight;

-	void appendNgram(const Word& word, bool& skip, std::string& ngram) const;
+	std::string m_baseName;
+
+	void appendNgram(const Word& word, bool& skip, std::stringstream& ngram) const;
+	void MakePrefixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
+			      size_t numberOfStartPos = 1, size_t offset = 0) const;
+	void MakeSuffixNgrams(std::vector<const Word*> &contextFactor, ScoreComponentCollection* accumulator,
+			      size_t numberOfEndPos = 1, size_t offset = 0) const;
 };

 }
--- a/moses/src/Word.cpp
+++ b/moses/src/Word.cpp
@ -85,6 +85,15 @@ std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlan
  return strme.str();
 }

+std::string Word::GetString(FactorType factorType) const
+{
+	const Factor *factor = m_factorArray[factorType];
+  if (factor != NULL)
+  	return factor->GetString();
+  else
+  	return NULL;
+}
+
 void Word::CreateFromString(FactorDirection direction
                            , const std::vector<FactorType> &factorOrder
                            , const std::string &str
@ -94,7 +103,8 @@ void Word::CreateFromString(FactorDirection direction

  vector<string> wordVec;
  Tokenize(wordVec, str, "|");
-  CHECK(wordVec.size() == factorOrder.size());
+  if (!isNonTerminal)
+  	assert(wordVec.size() == factorOrder.size());

  const Factor *factor;
  for (size_t ind = 0; ind < wordVec.size(); ++ind) {
--- a/moses/src/Word.h
+++ b/moses/src/Word.h
@ -101,6 +101,7 @@ public:
  * these debugging functions.
  */
  std::string GetString(const std::vector<FactorType> factorType,bool endWithBlank) const;
+  std::string GetString(FactorType factorType) const;
  TO_STRING();

  //! transitive comparison of Word objects