remove caching of wp weight and translation weights, clean up mira code

2024-12-26 13:23:25 +03:00 · 2012-04-29 05:37:48 +01:00 · 2012-04-29 05:37:48 +01:00 · ef552fe91a
commit ef552fe91a
parent f09c962f76
22 changed files with 335 additions and 319 deletions
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@ -369,12 +369,10 @@ namespace Mira {

  void MosesDecoder::setBleuParameters(bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
 		  bool scaleByInverseLength, bool scaleByAvgInverseLength,
-		  float scaleByX, float historySmoothing, size_t scheme, float relax_BP,
-		  bool useSourceLengthHistory) {
+		  float scaleByX, float historySmoothing, size_t scheme) {
 	  m_bleuScoreFeature->SetBleuParameters(sentenceBleu, scaleByInputLength, scaleByAvgInputLength,
 			  scaleByInverseLength, scaleByAvgInverseLength,
-			  scaleByX, historySmoothing, scheme, relax_BP,
-			  useSourceLengthHistory);
+			  scaleByX, historySmoothing, scheme);
  }
 } 

--- a/mira/Decoder.h
+++ b/mira/Decoder.h
@ -110,8 +110,7 @@ class MosesDecoder {
    size_t getShortestReferenceIndex(size_t ref_id);
    void setBleuParameters(bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
    		bool scaleByInverseLength, bool scaleByAvgInverseLength,
-    		float scaleByX, float historySmoothing, size_t scheme, float relax_BP,
-    		bool useSourceLengthHistory);
+    		float scaleByX, float historySmoothing, size_t scheme);
    void setAvgInputLength (float l) { m_bleuScoreFeature->SetAvgInputLength(l); }
    Moses::ScoreComponentCollection getWeights();
    void setWeights(const Moses::ScoreComponentCollection& weights);
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@ -41,6 +41,7 @@ namespace mpi = boost::mpi;
 #include "Optimiser.h"
 #include "Hildreth.h"
 #include "ThreadPool.h"
+#include "DummyScoreProducers.h"

 using namespace Mira;
 using namespace std;
@ -70,21 +71,17 @@ int main(int argc, char** argv) {
 	size_t mixingFrequency;
 	size_t weightDumpFrequency;
 	string weightDumpStem;
-	float min_learning_rate;
 	bool scale_margin, scale_margin_precision;
 	bool scale_update, scale_update_precision;
 	size_t n;
 	size_t batchSize;
 	bool distinctNbest;
-	bool onlyViolatedConstraints;
 	bool accumulateWeights;
 	float historySmoothing;
 	bool scaleByInputLength, scaleByAvgInputLength;
 	bool scaleByInverseLength, scaleByAvgInverseLength;
 	float scaleByX;
-	float slack, dummy;
-	float slack_step;
-	float slack_min;
+	float slack;
 	bool averageWeights;
 	bool weightConvergence;
 	float learning_rate;
@ -94,75 +91,67 @@ int main(int argc, char** argv) {
 	size_t baseOfLog;
 	string decoder_settings;
 	float min_weight_change;
-	float decrease_learning_rate;
 	bool normaliseWeights, normaliseMargin;
 	bool print_feature_values;
-	bool historyOf1best;
-	bool historyOfOracles;
+	bool historyBleu   ;
 	bool sentenceLevelBleu;
 	float bleuWeight, bleuWeight_hope, bleuWeight_fear;
-	float margin_slack;
-	float margin_slack_incr;
 	bool perceptron_update;
 	bool hope_fear, hope_fear_rank, hope_model;
 	bool model_hope_fear, rank_only;
 	int hope_n, fear_n, rank_n;
-	int threadcount;
-	size_t adapt_after_epoch;
 	size_t bleu_smoothing_scheme;
 	float max_length_dev_all;
 	float max_length_dev_hypos;
 	float max_length_dev_hope_ref;
 	float max_length_dev_fear_ref;
-	float relax_BP;
 	float min_oracle_bleu;
 	float minBleuRatio, maxBleuRatio;
 	bool boost;
 	bool decode_hope, decode_fear, decode_model;
 	string decode_filename;
-	size_t update_scheme;
-	bool separateUpdates, batchEqualsShard;
+	bool batchEqualsShard;
 	bool sparseAverage, dumpMixedWeights, sparseNoAverage;
-	bool useSourceLengthHistory;
 	int featureCutoff;
 	bool pruneZeroWeights;
 	bool megam;
 	bool printFeatureCounts, printNbestWithFeatures;
 	bool avgRefLength;
-	bool print_weights;
+	bool print_weights, print_core_weights, clear_static, debug_model, scale_lm, bleu_weight_lm, bleu_weight_lm_adjust, scale_wp;
+	float scale_lm_factor, bleu_weight_lm_factor, scale_wp_factor;
 	po::options_description desc("Allowed options");
 	desc.add_options()
-		("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
-		("dummy", po::value<float>(&dummy)->default_value(-1), "Dummy variable for slack")
 		("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
-		("adapt-after-epoch", po::value<size_t>(&adapt_after_epoch)->default_value(0), "Index of epoch after which adaptive parameters will be adapted")
 		("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
 		("avg-ref-length", po::value<bool>(&avgRefLength)->default_value(false), "Use average reference length instead of shortest for BLEU score feature")
 		("base-of-log", po::value<size_t>(&baseOfLog)->default_value(10), "Base for taking logs of feature values")
 		("batch-equals-shard", po::value<bool>(&batchEqualsShard)->default_value(false), "Batch size is equal to shard size (purely batch)")
 		("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
-		("bleu-score-weight", po::value<float>(&bleuWeight)->default_value(1.0), "Bleu score weight used in the decoder objective function (on top of the Bleu objective weight)")
-		("bleu-score-weight-hope", po::value<float>(&bleuWeight_hope)->default_value(-1), "Bleu score weight used in the decoder objective function for hope translations")
-		("bleu-score-weight-fear", po::value<float>(&bleuWeight_fear)->default_value(-1), "Bleu score weight used in the decoder objective function for fear translations")
+		("bleu-weight", po::value<float>(&bleuWeight)->default_value(1.0), "Bleu score weight used in the decoder objective function (on top of the Bleu objective weight)")
+		("bleu-weight-hope", po::value<float>(&bleuWeight_hope)->default_value(-1), "Bleu score weight used in the decoder objective function for hope translations")
+		("bleu-weight-fear", po::value<float>(&bleuWeight_fear)->default_value(-1), "Bleu score weight used in the decoder objective function for fear translations")
+      	        ("bleu-weight-lm", po::value<bool>(&bleu_weight_lm)->default_value(false), "Make bleu weight depend on lm weight")   
+      	        ("bleu-weight-factor-lm", po::value<float>(&bleu_weight_lm_factor)->default_value(2.0), "Make bleu weight depend on lm weight by this factor")     
+	        ("bleu-weight-adjust-lm", po::value<bool>(&bleu_weight_lm_adjust)->default_value(false), "Adjust bleu weight when lm weight changes")       
 		("bleu-smoothing-scheme", po::value<size_t>(&bleu_smoothing_scheme)->default_value(1), "Set a smoothing scheme for sentence-Bleu: +1 (1), +0.1 (2), papineni (3) (default:1)")
 		("boost", po::value<bool>(&boost)->default_value(false), "Apply boosting factor to updates on misranked candidates")
+  	        ("clear-static", po::value<bool>(&clear_static)->default_value(false), "Clear static data before every translation")
 		("config,f", po::value<string>(&mosesConfigFile), "Moses ini-file")
 		("configs-folds", po::value<vector<string> >(&mosesConfigFilesFolds), "Moses ini-files, one for each fold")
 		("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
+ 	        ("debug-model", po::value<bool>(&debug_model)->default_value(false), "Get best model translation for debugging purposes")
 		("decode-hope", po::value<bool>(&decode_hope)->default_value(false), "Decode dev input set according to hope objective")
 		("decode-fear", po::value<bool>(&decode_fear)->default_value(false), "Decode dev input set according to fear objective")
 		("decode-model", po::value<bool>(&decode_model)->default_value(false), "Decode dev input set according to normal objective")
 		("decode-filename", po::value<string>(&decode_filename), "Filename for Bleu objective translations")
 		("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
-		("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
 		("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
 		("dump-mixed-weights", po::value<bool>(&dumpMixedWeights)->default_value(false), "Dump mixed weights instead of averaged weights")
 		("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
 		("feature-cutoff", po::value<int>(&featureCutoff)->default_value(-1), "Feature cutoff as additional regularization for sparse features")
 		("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
 		("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
-		("history-of-1best", po::value<bool>(&historyOf1best)->default_value(false), "Use 1best translations to update the history")
-		("history-of-oracles", po::value<bool>(&historyOfOracles)->default_value(false), "Use oracle translations to update the history")
+		("history-of-1best", po::value<bool>(&historyBleu)->default_value(false), "Use 1best translations to update the history")
 		("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
 		("hope-fear", po::value<bool>(&hope_fear)->default_value(true), "Use only hope and fear translations for optimisation (not model)")
 		("hope-fear-rank", po::value<bool>(&hope_fear_rank)->default_value(false), "Use hope and fear translations for optimisation, use model for ranking")
@ -172,56 +161,51 @@ int main(int argc, char** argv) {
 		("input-files-folds", po::value<vector<string> >(&inputFilesFolds), "Input files containing tokenised source, one for each fold")
 		("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
 		("log-feature-values", po::value<bool>(&logFeatureValues)->default_value(false), "Take log of feature values according to the given base.")
-		("margin-incr", po::value<float>(&margin_slack_incr)->default_value(0), "Increment margin slack after every epoch by this amount")
-		("margin-slack", po::value<float>(&margin_slack)->default_value(0), "Slack when comparing left and right hand side of constraints")
 		("max-length-dev-all", po::value<float>(&max_length_dev_all)->default_value(-1), "Make use of all 3 following options")
 		("max-length-dev-hypos", po::value<float>(&max_length_dev_hypos)->default_value(-1), "Number between 0 and 1 specifying the percentage of admissible length deviation between hope and fear translations")
 		("max-length-dev-hope-ref", po::value<float>(&max_length_dev_hope_ref)->default_value(-1), "Number between 0 and 1 specifying the percentage of admissible length deviation between hope and reference translations")
 		("max-length-dev-fear-ref", po::value<float>(&max_length_dev_fear_ref)->default_value(-1), "Number between 0 and 1 specifying the percentage of admissible length deviation between fear and reference translations")
 		("min-bleu-ratio", po::value<float>(&minBleuRatio)->default_value(-1), "Set a minimum BLEU ratio between hope and fear")
 		("max-bleu-ratio", po::value<float>(&maxBleuRatio)->default_value(-1), "Set a maximum BLEU ratio between hope and fear")
-		("megam", po::value<bool>(&megam)->default_value(false), "Use megam for optimization step")
-		("min-learning-rate", po::value<float>(&min_learning_rate)->default_value(0), "Set a minimum learning rate")
+	        ("megam", po::value<bool>(&megam)->default_value(false), "Use megam for optimization step")
 		("min-oracle-bleu", po::value<float>(&min_oracle_bleu)->default_value(0), "Set a minimum oracle BLEU score")
 		("min-weight-change", po::value<float>(&min_weight_change)->default_value(0.01), "Set minimum weight change for stopping criterion")
 		("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
 		("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(1), "How often per epoch to mix weights, when using mpi")
 		("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimisation")
 		("nbest,n", po::value<size_t>(&n)->default_value(1), "Number of translations in n-best list")
-	  ("normalise-weights", po::value<bool>(&normaliseWeights)->default_value(true), "Whether to normalise the updated weights before passing them to the decoder")
+	        ("normalise-weights", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
 		("normalise-margin", po::value<bool>(&normaliseMargin)->default_value(false), "Normalise the margin: squash between 0 and 1")
-		("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
 		("perceptron-learning-rate", po::value<float>(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate")
 		("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
 		("print-feature-counts", po::value<bool>(&printFeatureCounts)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch")
 		("print-nbest-with-features", po::value<bool>(&printNbestWithFeatures)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch")
-	  ("print-weights", po::value<bool>(&print_weights)->default_value(false), "Print out new weights")
+	  ("print-weights", po::value<bool>(&print_weights)->default_value(false), "Print out current weights")
+	  ("print-core-weights", po::value<bool>(&print_core_weights)->default_value(false), "Print out current core weights")
 		("prune-zero-weights", po::value<bool>(&pruneZeroWeights)->default_value(false), "Prune zero-valued sparse feature weights")				
 		("rank-n", po::value<int>(&rank_n)->default_value(-1), "Number of translations used for ranking")
 		("rank-only", po::value<bool>(&rank_only)->default_value(false), "Use only model translations for optimisation")
 		("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
-		("reference-files-folds", po::value<vector<string> >(&referenceFilesFolds), "Reference translation files for training, one for each fold")		
-		("relax-BP", po::value<float>(&relax_BP)->default_value(1), "Relax the BP by setting this value between 0 and 1")
+		("reference-files-folds", po::value<vector<string> >(&referenceFilesFolds), "Reference translation files for training, one for each fold")	       
 		("scale-by-inverse-length", po::value<bool>(&scaleByInverseLength)->default_value(false), "Scale the BLEU score by (a history of) the inverse input length")
 		("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale the BLEU score by (a history of) the input length")
 		("scale-by-avg-input-length", po::value<bool>(&scaleByAvgInputLength)->default_value(false), "Scale BLEU by an average of the input length")
 		("scale-by-avg-inverse-length", po::value<bool>(&scaleByAvgInverseLength)->default_value(false), "Scale BLEU by an average of the inverse input length")
 		("scale-by-x", po::value<float>(&scaleByX)->default_value(1), "Scale the BLEU score by value x")
-		("scale-margin", po::value<bool>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
+	        ("scale-lm", po::value<bool>(&scale_lm)->default_value(false), "Scale the language model feature") 
+    	        ("scale-factor-lm", po::value<float>(&scale_lm_factor)->default_value(2), "Scale the language model feature by this factor")
+       	        ("scale-wp", po::value<bool>(&scale_wp)->default_value(false), "Scale the word penalty feature") 
+     	        ("scale-factor-wp", po::value<float>(&scale_wp_factor)->default_value(2), "Scale the word penalty feature by this factor")
+	        ("scale-margin", po::value<bool>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
 		("scale-margin-precision", po::value<bool>(&scale_margin_precision)->default_value(0), "Scale the margin by the precision of the oracle translation")
 		("scale-update", po::value<bool>(&scale_update)->default_value(0), "Scale the update by the Bleu score of the oracle translation")       
 		("scale-update-precision", po::value<bool>(&scale_update_precision)->default_value(0), "Scale the update by the precision of the oracle translation")	
 		("sentence-level-bleu", po::value<bool>(&sentenceLevelBleu)->default_value(true), "Use a sentences level Bleu scoring function")
-		("separate-updates", po::value<bool>(&separateUpdates)->default_value(false), "Compute separate updates for each sentence in a batch")
 		("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
-		("slack-min", po::value<float>(&slack_min)->default_value(0.01), "Minimum slack used")
-		("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
+	  ("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
 		("sparse-average", po::value<bool>(&sparseAverage)->default_value(false), "Average weights by the number of processes")
 		("sparse-no-average", po::value<bool>(&sparseNoAverage)->default_value(false), "Don't average sparse weights, just sum")
 		("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
-		("threads", po::value<int>(&threadcount)->default_value(1), "Number of threads used")
-		("update-scheme", po::value<size_t>(&update_scheme)->default_value(1), "Update scheme, default: 1")
-		("use-source-length-history", po::value<bool>(&useSourceLengthHistory)->default_value(false), "Use history of source length instead of target length for history Bleu")
 		("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
 		("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
 		("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");
@ -264,9 +248,6 @@ int main(int argc, char** argv) {
  		cerr << "Training with " << mosesConfigFilesFolds.size() << " folds" << endl;
  	trainWithMultipleFolds = true;
  }
-  
-  if (dummy != -1)
-    slack = dummy;

  if (dumpMixedWeights && (mixingFrequency != weightDumpFrequency)) {
 	  cerr << "Set mixing frequency = weight dump frequency for dumping mixed weights!" << endl;
@ -380,12 +361,10 @@ int main(int argc, char** argv) {
 	if (scaleByAvgInputLength ||  scaleByInverseLength || scaleByAvgInverseLength)
 		scaleByInputLength = false;

-	if (historyOf1best || historyOfOracles)
+	if (historyBleu)
 		sentenceLevelBleu = false;
 	if (!sentenceLevelBleu) {
-		if (!historyOf1best && !historyOfOracles) {
-			historyOf1best = true;
-		}
+	  historyBleu = true;
 	}

 	// initialise Moses
@ -411,17 +390,10 @@ int main(int argc, char** argv) {
 	MosesDecoder* decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params);
 	decoder->setBleuParameters(sentenceLevelBleu, scaleByInputLength, scaleByAvgInputLength,
 			scaleByInverseLength, scaleByAvgInverseLength,
-			scaleByX, historySmoothing, bleu_smoothing_scheme, relax_BP, useSourceLengthHistory);
+			scaleByX, historySmoothing, bleu_smoothing_scheme);
 	SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm();
 	bool chartDecoding = (searchAlgorithm == ChartDecoding);

-	cerr << "Normalise weights? " << normaliseWeights << endl;
-	if (normaliseWeights) {
-		ScoreComponentCollection startWeights = decoder->getWeights();
-		startWeights.L1Normalise();
-		decoder->setWeights(startWeights);
-	}
-
 	if (decode_hope || decode_fear || decode_model) {
 		size_t decode = 1;
 		if (decode_fear) decode = 2;
@ -463,8 +435,8 @@ int main(int argc, char** argv) {
 			cerr << "Optimising using Mira" << endl;
 			cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl;
 		}
-		optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_margin_precision,
-				scale_update, scale_update_precision, margin_slack, boost, update_scheme, normaliseMargin);
+		optimiser = new MiraOptimiser(slack, scale_margin, scale_margin_precision,
+					      scale_update, scale_update_precision, boost, normaliseMargin);
 		learning_rate = mira_learning_rate;
 		perceptron_update = false;
 	} else if (learner == "perceptron") {
@ -507,13 +479,6 @@ int main(int argc, char** argv) {
 		return 1;
 	}

-	if (bleuWeight_hope == -1) {
-		bleuWeight_hope = bleuWeight;
-	}
-	if (bleuWeight_fear == -1) {
-		bleuWeight_fear = bleuWeight;
-	}
-
 	if (max_length_dev_all != -1) {
 		max_length_dev_hypos = max_length_dev_all;
 		max_length_dev_hope_ref = max_length_dev_all;
@ -575,16 +540,37 @@ int main(int argc, char** argv) {

 	// set core weights
 	ScoreComponentCollection initialWeights = decoder->getWeights();
+	cerr << "Rank " << rank << ", initial weights: " << initialWeights << endl;
 	if (coreWeightMap.size() > 0) {
 		ProducerWeightMap::iterator p;
 		for(p = coreWeightMap.begin(); p!=coreWeightMap.end(); ++p)
 			initialWeights.Assign(p->first, p->second);
 	}
+	cerr << "Normalise weights? " << normaliseWeights << endl;
+	if (normaliseWeights) 
+	  initialWeights.L1Normalise();
 	decoder->setWeights(initialWeights);

-	//Main loop:
-	// print initial weights
-	cerr << "Rank " << rank << ", initial weights: " << initialWeights << endl;
+	// set bleu weight to twice the size of the language model weight(s)
+	const LMList& lmList = staticData.GetLMList();
+	if (bleu_weight_lm) {
+	  float lmSum = 0;
+	  for (LMList::const_iterator i = lmList.begin(); i != lmList.end(); ++i) 
+	    lmSum += abs(initialWeights.GetScoreForProducer(*i));
+	  bleuWeight = lmSum * bleu_weight_lm_factor;
+	  
+	  if (bleuWeight_hope == -1) {
+	    bleuWeight_hope = bleuWeight;
+	  }
+	  if (bleuWeight_fear == -1) {
+	    bleuWeight_fear = bleuWeight;
+	  }
+	}
+
+	cerr << "Bleu weight: " << bleuWeight << endl;
+
+	//Main loop:	
+	cerr << "Rank " << rank << ", start weights: " << initialWeights << endl;
 	ScoreComponentCollection cumulativeWeights; // collect weights per epoch to produce an average
 	ScoreComponentCollection cumulativeWeightsBinary;
 	size_t numberOfUpdates = 0;
@ -707,7 +693,7 @@ int main(int argc, char** argv) {
 					bleuScoresFear.push_back(newScores);
 					modelScoresHope.push_back(newScores);
 					modelScoresFear.push_back(newScores);
-					if (historyOf1best) {
+					if (historyBleu || debug_model) {
 						dummyFeatureValues.push_back(newFeatureValues);
 						dummyBleuScores.push_back(newScores);
 						dummyModelScores.push_back(newScores);
@ -761,12 +747,30 @@ int main(int argc, char** argv) {
 				}

 				if (hope_fear || hope_fear_rank || perceptron_update) {
+				  if (print_weights)
+				    cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: " << mosesWeights << endl;
+				  if (print_core_weights) {
+				    cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: ";
+				    mosesWeights.PrintCoreFeatures(); 
+				    cerr << endl;
+				  }
+				  
+				  // check LM weight
+				  for (LMList::const_iterator i = lmList.begin(); i != lmList.end(); ++i) {
+				    float lmWeight = mosesWeights.GetScoreForProducer(*i);
+				    cerr << "Rank " << rank << ", epoch " << epoch << ", lm weight: " << lmWeight << endl;
+				    if (lmWeight <= 0)
+				      cerr << "ERROR: language model weight should never be <= 0." << endl;
+				  }
+				  
 					// HOPE		  
-				  /*delete decoder;
-				  StaticData::ClearDataStatic();
-				  decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params);
-				  decoder->setBleuParameters(sentenceLevelBleu, scaleByInputLength, scaleByAvgInputLength, scaleByInverseLength, scaleByAvgInverseLength, scaleByX, historySmoothing, bleu_smoothing_scheme, relax_BP, useSourceLengthHistory);
-				  decoder->setWeights(mosesWeights);*/
+				  if (clear_static) {
+				    delete decoder;
+				    StaticData::ClearDataStatic();
+				    decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params);
+				    decoder->setBleuParameters(sentenceLevelBleu, scaleByInputLength, scaleByAvgInputLength, scaleByInverseLength, scaleByAvgInverseLength, scaleByX, historySmoothing, bleu_smoothing_scheme);
+				    decoder->setWeights(mosesWeights);
+				}    

 				  //cerr << "Rank " << rank << ", epoch " << epoch << ", using weights: " << decoder->getWeights() << endl; 
 				  
@ -795,9 +799,8 @@ int main(int argc, char** argv) {

 					
 					float precision = bleuScoresHope[batchPosition][0];
-					if (historyOf1best) {
-						if (useSourceLengthHistory) precision /= decoder->getSourceLengthHistory();
-						else precision /= decoder->getTargetLengthHistory();
+					if (historyBleu) {
+					  precision /= decoder->getTargetLengthHistory();
 					}
 					else {
 						if (scaleByAvgInputLength) precision /= decoder->getAverageInputLength();
@ -805,13 +808,12 @@ int main(int argc, char** argv) {
 						precision /= scaleByX;
 					}
 					if (scale_margin_precision || scale_update_precision) {
-						if (historyOf1best || scaleByAvgInputLength || scaleByAvgInverseLength) {
+						if (historyBleu || scaleByAvgInputLength || scaleByAvgInverseLength) {
 							cerr << "Rank " << rank << ", epoch " << epoch << ", set hope precision: " << precision << endl;
 							((MiraOptimiser*) optimiser)->setPrecision(precision);
 						}
 					}
 					
-//					exit(0);
 					bool skip = false;

 					// Length-related example selection
@ -820,9 +822,17 @@ int main(int argc, char** argv) {
 					  skip = true;

 					vector<const Word*> bestModel;
-					if (historyOf1best && !skip) {
+					if (debug_model || (historyBleu && !skip)) {
 						// MODEL (for updating the history only, using dummy vectors)
-						cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (for history or length stabilisation)" << endl;
+					  if (clear_static) {
+                                            delete decoder;
+					    StaticData::ClearDataStatic();
+                                            decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params);
+                                            decoder->setBleuParameters(sentenceLevelBleu, scaleByInputLength, scaleByAvgInputLength, scaleByInverseLength, scaleByAvgInverseLength, scaleByX, historySmoothing, bleu_smoothing_scheme);
+                                            decoder->setWeights(mosesWeights);
+                                          }
+
+						cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score " << endl;
 						vector< vector<const Word*> > outputModel = decoder->getNBest(input, *sid, 1, 0.0, bleuWeight,
 								dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], dummyModelScores[batchPosition],
 								1, distinctNbest, avgRefLength, rank, epoch);
@ -837,11 +847,13 @@ int main(int argc, char** argv) {
 					float bleuRatioHopeFear = 0;
 					int fearSize = 0;
 					if (!skip) {
-					  /*delete decoder;
-					  StaticData::ClearDataStatic();
-					  decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params);
-					  decoder->setBleuParameters(sentenceLevelBleu, scaleByInputLength, scaleByAvgInputLength, scaleByInverseLength, scaleByAvgInverseLength, scaleByX, historySmoothing, bleu_smoothing_scheme, relax_BP, useSourceLengthHistory);
-					  decoder->setWeights(mosesWeights);*/
+					  if (clear_static) {
+					    delete decoder;
+					    StaticData::ClearDataStatic();
+					    decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params);
+					    decoder->setBleuParameters(sentenceLevelBleu, scaleByInputLength, scaleByAvgInputLength, scaleByInverseLength, scaleByAvgInverseLength, scaleByX, historySmoothing, bleu_smoothing_scheme);
+					    decoder->setWeights(mosesWeights);
+					  }

 						cerr << "Rank " << rank << ", epoch " << epoch << ", " << fear_n << "best fear translations" << endl;
 						vector< vector<const Word*> > outputFear = decoder->getNBest(input, *sid, fear_n, -1.0, bleuWeight_fear,
@ -888,7 +900,7 @@ int main(int argc, char** argv) {

 					// sanity check
 					float epsilon = 0.0001;
-					if (historyOf1best) {
+					if (historyBleu) {
 					  if (dummyBleuScores[batchPosition][0] > bleuScoresHope[batchPosition][0] &&
 					      dummyModelScores[batchPosition][0] > modelScoresHope[batchPosition][0]) {
 					    if (abs(dummyBleuScores[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon &&
@ -908,8 +920,14 @@ int main(int argc, char** argv) {
 					}
 					if (bleuScoresFear[batchPosition][0] > bleuScoresHope[batchPosition][0]) {
 					  if (abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon) {
-					    cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than HOPE translation. (abs-diff: " << abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) << " > " << epsilon << ")" <<endl;
+					    // check if it's an error or a warning
 					    skip = true;
+					    if (modelScoresFear[batchPosition][0] > modelScoresHope[batchPosition][0] && abs(modelScoresFear[batchPosition][0] - modelScoresHope[batchPosition][0]) > epsilon) {
+					      cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than HOPE translation. (abs-diff: " << abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) << ")" <<endl;			   
+					    }
+					    else {
+					      cerr << "Rank " << rank << ", epoch " << epoch << ", WARNING: FEAR translation has better Bleu than HOPE translation. (abs-diff: " << abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) << ")" <<endl;
+					    }
 					  }
 					}
 					
@ -919,7 +937,7 @@ int main(int argc, char** argv) {
 						featureValuesFear[batchPosition].clear();
 						bleuScoresHope[batchPosition].clear();
 						bleuScoresFear[batchPosition].clear();
-						if (historyOf1best) {
+						if (historyBleu) {
 							dummyFeatureValues[batchPosition].clear();
 							dummyBleuScores[batchPosition].clear();
 						}
@ -1143,6 +1161,35 @@ int main(int argc, char** argv) {
 				    break;
 				  }
 				}
+
+				// scale LM feature (to avoid rapid changes)
+				if (scale_lm) {
+				  for (LMList::const_iterator iter = lmList.begin(); iter != lmList.end(); ++iter) {
+				    // scale up weight
+				    float lmWeight = mosesWeights.GetScoreForProducer(*iter);
+				    mosesWeights.Assign(*iter, lmWeight*scale_lm_factor);
+				    cerr << "Rank " << rank << ", epoch " << epoch << ", lm weight scaled from " << lmWeight << " to " << lmWeight*scale_lm_factor << endl;
+
+				    // scale down score
+				    scaleFeatureScore(*iter, scale_lm_factor, featureValuesHope, rank, epoch);
+				    scaleFeatureScore(*iter, scale_lm_factor, featureValuesFear, rank, epoch);
+				    scaleFeatureScore(*iter, scale_lm_factor, featureValues, rank, epoch);
+				  }
+				}
+
+				// scale WP
+				if (scale_wp) {
+				  // scale up weight  
+				  WordPenaltyProducer *wp = staticData.GetWordPenaltyProducer();
+				  float wpWeight = mosesWeights.GetScoreForProducer(wp);
+                                  mosesWeights.Assign(wp, wpWeight*scale_wp_factor);
+                                  cerr << "Rank " << rank << ", epoch " << epoch << ", wp weight scaled from " << wpWeight << " to " << wpWeight*scale_wp_factor << endl;
+
+				  // scale down score
+				  scaleFeatureScore(wp, scale_wp_factor, featureValuesHope, rank, epoch);
+				  scaleFeatureScore(wp, scale_wp_factor, featureValuesFear, rank, epoch);
+				  scaleFeatureScore(wp, scale_wp_factor, featureValues, rank, epoch);
+				}
 				
 				// take logs of feature values
 				if (logFeatureValues) {
@ -1189,25 +1236,7 @@ int main(int argc, char** argv) {
 									featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0], bleuScoresFear[0][0],
 									modelScoresHope[0][0], modelScoresFear[0][0], learning_rate, rank, epoch);
 						else {
-							if (batchSize > 1 && separateUpdates) {
-								// separate updates for all input sentences
-								ScoreComponentCollection tmpWeights(mosesWeights);
-								for (size_t i = 0; i < batchSize; ++i) {
-									// use only the specified batch position to compute the update
-									int updatePosition = i;
-									ScoreComponentCollection partialWeightUpdate;
-									size_t partial_update_status = optimiser->updateWeightsHopeFear(tmpWeights, partialWeightUpdate,
-										featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear,
-										modelScoresHope, modelScoresFear, learning_rate, rank, epoch, updatePosition);
-									if (partial_update_status == 0) {
-										update_status = 0;
-										weightUpdate.PlusEquals(partialWeightUpdate);
-										tmpWeights.PlusEquals(partialWeightUpdate);
-									}
-								}
-							}
-							else
-								update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
+						  update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
 									featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear,
 									modelScoresHope, modelScoresFear, learning_rate, rank, epoch);
 						}
@ -1233,6 +1262,25 @@ int main(int argc, char** argv) {

 //			sumStillViolatedConstraints += update_status;

+				// rescale LM feature                                              
+				if (scale_lm) {
+				  for (LMList::const_iterator iter = lmList.begin(); iter != lmList.end(); ++iter) {
+				    // scale weight back down                                                                
+				    float lmWeight = mosesWeights.GetScoreForProducer(*iter);
+				    mosesWeights.Assign(*iter, lmWeight/scale_lm_factor);
+				    cerr << "Rank " << rank << ", epoch " << epoch << ", lm weight rescaled from " << lmWeight << " to " << lmWeight/scale_lm_factor << endl;
+				  }
+				}
+
+				// rescale WP feature
+				if (scale_wp) {
+				  // scale weight back down
+				  WordPenaltyProducer *wp = staticData.GetWordPenaltyProducer();
+				  float wpWeight = mosesWeights.GetScoreForProducer(wp);
+				  mosesWeights.Assign(wp, wpWeight/scale_wp_factor);
+				  cerr << "Rank " << rank << ", epoch " << epoch << ", wp weight rescaled from " << wpWeight << " to " << wpWeight/scale_wp_factor << endl;                                  
+                                }
+
 				if (update_status == 0) {	 // if weights were updated
 					// apply weight update
 				        cerr << "Rank " << rank << ", epoch " << epoch << ", applying update.." << endl;
@ -1263,23 +1311,31 @@ int main(int argc, char** argv) {

 					// set new Moses weights
 					decoder->setWeights(mosesWeights);
-					if (print_weights)
-					  cerr << "Rank " << rank << ", epoch " << epoch << ", new weights: " << mosesWeights << endl;
+
+					// adjust bleu weight
+					if (bleu_weight_lm_adjust) {
+					  float lmSum = 0;
+					  for (LMList::const_iterator i = lmList.begin(); i != lmList.end(); ++i)
+					    lmSum += abs(mosesWeights.GetScoreForProducer(*i));
+					  bleuWeight = lmSum * bleu_weight_lm_factor;
+					  cerr << "Rank " << rank << ", epoch " << epoch << ", adjusting Bleu weight to " << bleuWeight << " (factor " << bleu_weight_lm_factor << ")" << endl;
+					  
+					  if (bleuWeight_hope == -1) {
+					    bleuWeight_hope = bleuWeight;
+					  }
+					  if (bleuWeight_fear == -1) {
+					    bleuWeight_fear = bleuWeight;
+					  }
+					}
 				}

 				// update history (for approximate document Bleu)
-				if (historyOf1best) {
+				if (historyBleu) {
 					for (size_t i = 0; i < oneBests.size(); ++i) {
 						cerr << "Rank " << rank << ", epoch " << epoch << ", update history with 1best length: " << oneBests[i].size() << " ";
 						}
 					decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, epoch);
 				}
-				else if (historyOfOracles) {
-					for (size_t i = 0; i < oracles.size(); ++i) {
-						cerr << "Rank " << rank << ", epoch " << epoch << ", update history with oracle length: " << oracles[i].size() << " ";
-					}
-					decoder->updateHistory(oracles, inputLengths, ref_ids, rank, epoch);
-				}
 				deleteTranslations(oracles);
 				deleteTranslations(oneBests);
 			} // END TRANSLATE AND UPDATE BATCH
@ -1462,7 +1518,7 @@ int main(int argc, char** argv) {
 			fearPlusFeatures.close();
 		}

-		if (verbosity > 0) {
+		if (historyBleu) {
 			cerr << "Bleu feature history after epoch " <<  epoch << endl;
 			decoder->printBleuFeatureHistory(cerr);
 		}
@ -1537,42 +1593,6 @@ int main(int argc, char** argv) {
 				mpi::broadcast(world, stop, 0);
 #endif
 			} //end if (weightConvergence)
-
-			// adjust flexible parameters
-			if (!stop && epoch >= adapt_after_epoch) {
-				// if using flexible slack, decrease slack parameter for next epoch
-				if (slack_step > 0) {
-					if (slack - slack_step >= slack_min) {
-						if (typeid(*optimiser) == typeid(MiraOptimiser)) {
-							slack -= slack_step;
-							VERBOSE(1, "Change slack to: " << slack << endl);
-							((MiraOptimiser*) optimiser)->setSlack(slack);
-						}
-					}
-				}
-
-				// if using flexible margin slack, decrease margin slack parameter for next epoch
-				if (margin_slack_incr > 0.0001) {
-					if (typeid(*optimiser) == typeid(MiraOptimiser)) {
-						margin_slack += margin_slack_incr;
-						VERBOSE(1, "Change margin slack to: " << margin_slack << endl);
-						((MiraOptimiser*) optimiser)->setMarginSlack(margin_slack);
-					}
-				}
-
-				// change learning rate
-				if ((decrease_learning_rate > 0) && (learning_rate - decrease_learning_rate >= min_learning_rate)) {
-					learning_rate -= decrease_learning_rate;
-					if (learning_rate <= 0.0001) {
-						learning_rate = 0;
-						stop = true;
-#ifdef MPI_ENABLE
-						mpi::broadcast(world, stop, 0);
-#endif
-					}
-					VERBOSE(1, "Change learning rate to " << learning_rate << endl);
-				}
-			}
 		}
 	} // end of epoch loop

@ -1806,3 +1826,17 @@ void decodeHopeOrFear(size_t rank, size_t size, size_t decode, string filename,
 	delete decoder;
 	exit(0);
 }
+
+void scaleFeatureScore(ScoreProducer *sp, float scaling_factor, vector<vector<ScoreComponentCollection> > &featureValues, size_t rank, size_t epoch) {
+  string name = sp->GetScoreProducerWeightShortName();
+
+  // scale down score
+  float featureScore;
+  for (size_t i=0; i<featureValues.size(); ++i) { // each item in batch
+    for (size_t j=0; j<featureValues[i].size(); ++j) { // each item in nbest
+      featureScore = featureValues[i][j].GetScoreForProducer(sp);
+      featureValues[i][j].Assign(sp, featureScore/scaling_factor);
+      cerr << "Rank " << rank << ", epoch " << epoch << ", " << name << " score scaled from " << featureScore << " to " << featureScore/scaling_factor << endl;
+    }
+  }
+}
--- a/mira/Main.h
+++ b/mira/Main.h
@ -51,5 +51,6 @@ void ignoreCoreFeatures(std::vector<std::vector<Moses::ScoreComponentCollection>
 void takeLogs(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t base);
 void deleteTranslations(std::vector<std::vector<const Moses::Word*> > &translations);
 void decodeHopeOrFear(size_t rank, size_t size, size_t decode, std::string decode_filename, std::vector<std::string> &inputSentences, Mira::MosesDecoder* decoder, size_t n);
+void scaleFeatureScore(Moses::ScoreProducer *sp, float scaling_factor,  std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t rank, size_t epoch);

 #endif /* MAIN_H_ */
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@ -28,7 +28,6 @@ size_t MiraOptimiser::updateWeights(

 	// most violated constraint in batch
 	ScoreComponentCollection max_batch_featureValueDiff;
-	float max_batch_lossMinusModelScoreDiff = -1;

 	// Make constraints for new hypothesis translations
 	float epsilon = 0.0001;
@ -56,19 +55,16 @@ size_t MiraOptimiser::updateWeights(
 //		    float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
 		    float modelScoreDiff = oracleModelScores[i] - modelScores[i][j];
 		    float diff = 0;
-		    if (loss > (modelScoreDiff + m_margin_slack)) {
-		    	diff = loss - (modelScoreDiff + m_margin_slack);
+		    if (loss > modelScoreDiff) {
+		    	diff = loss - modelScoreDiff;
 		    }
-		    cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " <<  m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
+		    cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;

 		    if (diff > epsilon) {
 		    	violated = true;
 		    }
-		    else if (m_onlyViolatedConstraints) {
-		    	addConstraint = false;
-		    }

-		    float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_margin_slack);
+		    float lossMinusModelScoreDiff = loss - modelScoreDiff;
 		    if (addConstraint) {
 		    	if (m_normaliseMargin)
 		    		lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
@ -146,7 +142,7 @@ size_t MiraOptimiser::updateWeights(
 	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
 		float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
 		float loss = all_losses[i];
-		float diff = loss - (modelScoreDiff + m_margin_slack);
+		float diff = loss - modelScoreDiff;
 		if (diff > epsilon) {
 			++violatedConstraintsAfter;
 			newDistanceFromOptimum += diff;
@ -180,7 +176,6 @@ size_t MiraOptimiser::updateWeightsHopeFear(

 	// most violated constraint in batch
 	ScoreComponentCollection max_batch_featureValueDiff;
-	float max_batch_lossMinusModelScoreDiff = -1;

 	// Make constraints for new hypothesis translations
 	float epsilon = 0.0001;
@ -215,19 +210,16 @@ size_t MiraOptimiser::updateWeightsHopeFear(
 //				float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
 				float modelScoreDiff = modelScoresHope[i][j] - modelScoresFear[i][k];
 				float diff = 0;
-				if (loss > (modelScoreDiff + m_margin_slack)) {
-					diff = loss - (modelScoreDiff + m_margin_slack);
+				if (loss > modelScoreDiff) {
+					diff = loss - modelScoreDiff;
 				}
-				cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
+				cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;

 				if (diff > epsilon) {
 					violated = true;
 				}
-				else if (m_onlyViolatedConstraints) {
-					addConstraint = false;
-				}

-				float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_margin_slack);			
+				float lossMinusModelScoreDiff = loss - modelScoreDiff;			
 				if (addConstraint) {
 					if (m_normaliseMargin)
 						lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
@ -318,7 +310,7 @@ size_t MiraOptimiser::updateWeightsHopeFear(
 	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
 		float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
 		float loss = all_losses[i];
-		float diff = loss - (modelScoreDiff + m_margin_slack);
+		float diff = loss - modelScoreDiff;
 		if (diff > epsilon) {
 			++violatedConstraintsAfter;
 			newDistanceFromOptimum += diff;
@ -349,86 +341,29 @@ size_t MiraOptimiser::updateWeightsAnalytically(

 // cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl;
 // cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl;
-
-  // scenario 1: reward only-hope, penalize only-fear
-  // scenario 2: reward all-hope, penalize only-fear
-  // scenario 3: reward all-hope
-  // scenario 4: reward strongly only-hope, reward mildly all-hope
-  // scenario 5: reward strongly only-hope, reward mildly all-hope, penalize only-fear
-  // scenario 6: reward only-hope
-  // scenario 7: penalize only-fear
-
-  ScoreComponentCollection featureValueDiff;
-  switch (m_update_scheme) {
-  case 2:
-	  // values: 1: all-hope, -1: only-fear
-	  featureValueDiff = featureValuesHope;
-	  featureValueDiff.MinusEquals(featureValuesFear);
-	  featureValueDiff.SparsePlusEquals(featureValuesHope);
-	  //max: 1 (set all 2 to 1)
-	  featureValueDiff.CapMax(1);
-	  break;
-  case 3:
-	  // values: 1: all-hope
-	  featureValueDiff = featureValuesHope;
-	  break;
-  case 4:
-	  // values: 2: only-hope, 1: both
-	  featureValueDiff = featureValuesHope;
-	  featureValueDiff.MinusEquals(featureValuesFear);
-	  featureValueDiff.SparsePlusEquals(featureValuesHope);
-	  // min: 0 (set all -1 to 0)
-	  featureValueDiff.CapMin(0);
-	  break;
-  case 5:
-	  // values: 2: only-hope, 1: both, -1: only-fear
-	  featureValueDiff = featureValuesHope;
-	  featureValueDiff.MinusEquals(featureValuesFear);
-	  featureValueDiff.SparsePlusEquals(featureValuesHope);
-	  break;
-  case 6:
-  	// values: 1: only-hope
-  	featureValueDiff = featureValuesHope;
-  	featureValueDiff.MinusEquals(featureValuesFear);
-  	// min: 0 (set all -1 to 0)
-  	featureValueDiff.CapMin(0);
-  	break;
-  case 7:
-  	// values: -1: only-fear
-  	featureValueDiff = featureValuesHope;
-  	featureValueDiff.MinusEquals(featureValuesFear);
-  	// max: 0 (set all 1 to 0)
-  	featureValueDiff.CapMax(0);
-  	break;
-  case 1:
-  default:
-	  // values: 1: only-hope, -1: only-fear
-	  featureValueDiff = featureValuesHope;
-	  featureValueDiff.MinusEquals(featureValuesFear);
-	  break;
+  ScoreComponentCollection featureValueDiff = featureValuesHope;
+  featureValueDiff.MinusEquals(featureValuesFear);
+  if (featureValueDiff.GetL1Norm() == 0) {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
+    return 1;
  }

-	if (featureValueDiff.GetL1Norm() == 0) {
-		cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
-		return 1;
-	}
-
 //  cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
 //  float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
  float modelScoreDiff = modelScoreHope - modelScoreFear;
  float loss = bleuScoreHope - bleuScoreFear;
  float diff = 0;
-  if (loss > (modelScoreDiff + m_margin_slack)) {
-  	diff = loss - (modelScoreDiff + m_margin_slack);
+  if (loss > modelScoreDiff) {
+  	diff = loss - modelScoreDiff;
  }
-  cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
+  cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
  if (m_normaliseMargin) {
    modelScoreDiff = (2/(1 + exp(-modelScoreDiff))) - 1;
    loss = (2/(1 + exp(-loss))) - 1;
-    if (loss > (modelScoreDiff + m_margin_slack)) {
-      diff = loss - (modelScoreDiff + m_margin_slack);
+    if (loss > modelScoreDiff) {
+      diff = loss - modelScoreDiff;
    }
-    cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
+    cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
  }
  
  if (m_scale_margin) {
@ -512,7 +447,7 @@ size_t MiraOptimiser::updateWeightsAnalytically(
  featureValueDiff = featureValuesHope;
  featureValueDiff.MinusEquals(featureValuesFear);
  modelScoreDiff = featureValueDiff.InnerProduct(newWeights);
-  diff = loss - (modelScoreDiff + m_margin_slack);
+  diff = loss - modelScoreDiff;
  // approximate comparison between floats!
  if (diff > epsilon) {
    constraintViolatedAfter = true;
@ -581,9 +516,6 @@ size_t MiraOptimiser::updateWeightsRankModel(
 				if (diff > epsilon) {
 					violated = true;
 				}
-				else if (m_onlyViolatedConstraints) {
-					addConstraint = false;
-				}

 				float lossMinusModelScoreDiff = loss - modelScoreDiff;
 				if (addConstraint) {
@ -663,7 +595,7 @@ size_t MiraOptimiser::updateWeightsRankModel(
 	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
 		float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
 		float loss = all_losses[i];
-		float diff = loss - (modelScoreDiff + m_margin_slack);
+		float diff = loss - modelScoreDiff;
 		if (diff > epsilon) {
 			++violatedConstraintsAfter;
 			newDistanceFromOptimum += diff;
@ -731,9 +663,6 @@ size_t MiraOptimiser::updateWeightsHopeFearAndRankModel(
 				if (diff > epsilon) {
 					violated = true;
 				}
-				else if (m_onlyViolatedConstraints) {
-					addConstraint = false;
-				}

 				float lossMinusModelScoreDiff = loss - modelScoreDiff;
 				if (addConstraint) {
@ -787,9 +716,6 @@ size_t MiraOptimiser::updateWeightsHopeFearAndRankModel(
 				if (diff > epsilon) {
 					violated = true;
 				}
-				else if (m_onlyViolatedConstraints) {
-					addConstraint = false;
-				}

 				float lossMinusModelScoreDiff = loss - modelScoreDiff;
 				if (addConstraint) {
@ -869,7 +795,7 @@ size_t MiraOptimiser::updateWeightsHopeFearAndRankModel(
 	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
 		float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
 		float loss = all_losses[i];
-		float diff = loss - (modelScoreDiff + m_margin_slack);
+		float diff = loss - modelScoreDiff;
 		if (diff > epsilon) {
 			++violatedConstraintsAfter;
 			newDistanceFromOptimum += diff;
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -67,20 +67,16 @@ namespace Mira {
 	  MiraOptimiser() :
 		  Optimiser() { }

-	  MiraOptimiser(bool onlyViolatedConstraints, float slack, bool scale_margin, bool scale_margin_precision,
-			  bool scale_update, bool scale_update_precision, float margin_slack, bool boost, 
-			  size_t update_scheme, bool normaliseMargin) :
+	  MiraOptimiser(float slack, bool scale_margin, bool scale_margin_precision,
+			bool scale_update, bool scale_update_precision, bool boost, bool normaliseMargin) :
 		  Optimiser(),
-		  m_onlyViolatedConstraints(onlyViolatedConstraints),
 		  m_slack(slack),
 		  m_scale_margin(scale_margin),
 		  m_scale_margin_precision(scale_margin_precision),
 		  m_scale_update(scale_update),
 		  m_scale_update_precision(scale_update_precision),
 		  m_precision(1),
-		  m_margin_slack(margin_slack),
 		  m_boost(boost),
-		  m_update_scheme(update_scheme),
 		  m_normaliseMargin(normaliseMargin) { }
   
 	  size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
@ -145,10 +141,6 @@ namespace Mira {
     void setSlack(float slack) {
    	 m_slack = slack;
     }
-
-     void setMarginSlack(float margin_slack) {
-    	 m_margin_slack = margin_slack;
-     }
     
     void setPrecision(float precision) {
    	 m_precision = precision;
@ -156,15 +148,9 @@ namespace Mira {

   private:

-      // add only violated constraints to the optimisation problem
-      bool m_onlyViolatedConstraints;
-
      // regularise Hildreth updates
      float m_slack;

-      // slack when comparing losses to model scores
-      float m_margin_slack;
-
      // scale margin with BLEU score or precision
      bool m_scale_margin, m_scale_margin_precision;

@ -176,9 +162,6 @@ namespace Mira {
      // boosting of updates on misranked candidates
      bool m_boost;

-      // select 1 of 5 different update schemes
-      size_t m_update_scheme;
-
      // squash margin between 0 and 1
      bool m_normaliseMargin;
  };
--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@ -92,8 +92,7 @@ void BleuScoreFeature::PrintHistory(std::ostream& out) const {

 void BleuScoreFeature::SetBleuParameters(bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
 		bool scaleByInverseLength, bool scaleByAvgInverseLength,
-		float scaleByX, float historySmoothing, size_t scheme, float relaxBP,
-		bool useSourceLengthHistory) {
+		float scaleByX, float historySmoothing, size_t scheme) {
 	m_sentence_bleu = sentenceBleu;
 	m_scale_by_input_length = scaleByInputLength;
 	m_scale_by_avg_input_length = scaleByAvgInputLength;
@ -102,8 +101,6 @@ void BleuScoreFeature::SetBleuParameters(bool sentenceBleu, bool scaleByInputLen
 	m_scale_by_x = scaleByX;
 	m_historySmoothing = historySmoothing;
 	m_smoothing_scheme = (SmoothingScheme)scheme;
-	m_relax_BP = relaxBP;
-	m_useSourceLengthHistory = useSourceLengthHistory;
 }

 // Incoming references (refs) are stored as refs[file_id][[sent_id][reference]]
@ -633,7 +630,7 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
      			smoothed_count += 1;
      		}
      		break;
-      	case LIGHT:
+      	case PLUS_POINT_ONE:
      		if (i > 0) {
      			// smoothing for all n > 1
      			smoothed_matches += 0.1;
@ -662,9 +659,9 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
    // where
    // c: length of the candidate translation
    // r: effective reference length (sum of best match lengths for each candidate sentence)
-  	if (state->m_target_length < (state->m_scaled_ref_length * m_relax_BP)) {
+  	if (state->m_target_length < state->m_scaled_ref_length) {
  		float smoothed_target_length = m_target_length_history + state->m_target_length;
-  		float smoothed_ref_length = m_ref_length_history + (state->m_scaled_ref_length * m_relax_BP);
+  		float smoothed_ref_length = m_ref_length_history + state->m_scaled_ref_length;
  		precision *= exp(1 - (smoothed_ref_length/ smoothed_target_length));
  	}

@ -696,8 +693,8 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
    // 		= BP * 4th root(PRODUCT_1_4 p_n)
    for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
      if (state->m_ngram_counts[i]) {
-      	smoothed_matches = m_match_history[i] + state->m_ngram_matches[i];
-      	smoothed_count = m_count_history[i] + state->m_ngram_counts[i];
+      	smoothed_matches = m_match_history[i] + state->m_ngram_matches[i] + 0.1;
+      	smoothed_count = m_count_history[i] + state->m_ngram_counts[i] + 0.1;
      	precision *= smoothed_matches / smoothed_count;
      }
    }
@ -705,18 +702,18 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
    // take geometric mean
    precision = pow(precision, (float)1/4);

-  	// BP
+    // Apply brevity penalty if applicable.
    if (m_target_length_history + state->m_target_length < m_ref_length_history + state->m_scaled_ref_length)
  	  precision *= exp(1 - (m_ref_length_history + state->m_scaled_ref_length/m_target_length_history + state->m_target_length));

-//    cerr << "precision: " << precision << endl;
+    //cerr << "\nprecision: " << precision << endl;

    // **BLEU score of pseudo-document**
    float precision_pd = 1.0;
    if (m_target_length_history > 0) {
    	for (size_t i = 0; i < BleuScoreState::bleu_order; i++)
 	  if (m_count_history[i] != 0)
-	    precision_pd *= m_match_history[i]/m_count_history[i];
+	    precision_pd *= (m_match_history[i] + 0.1)/(m_count_history[i] + 0.1);

      // take geometric mean
      precision_pd = pow(precision_pd, (float)1/4);
@ -729,18 +726,16 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
      precision_pd = 0;
    // **end BLEU of pseudo-document**

-//    cerr << "precision pd: " << precision_pd << endl;
+    //cerr << "precision pd: " << precision_pd << endl;

    float sentence_impact;
-    if (m_target_length_history > 0) {
-    	if (m_source_length_history)
-    		sentence_impact = m_source_length_history * (precision - precision_pd);
-    	else
-    		sentence_impact = m_target_length_history * (precision - precision_pd);
-    }
+    if (m_target_length_history > 0) 
+      sentence_impact = m_target_length_history * (precision - precision_pd);    
    else
-    	sentence_impact = precision;
+      sentence_impact = precision;

+    sentence_impact *= 10;
+    //cerr << "sentence impact: " << sentence_impact << endl;
    return sentence_impact;
  }
 }
--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@ -66,7 +66,7 @@ public:
 	                                 m_scale_by_x(1),
 	                                 m_historySmoothing(0.9),
 	                                 m_useSourceLengthHistory(0),
-	                                 m_smoothing_scheme(PLUS_ONE),
+	                                 m_smoothing_scheme(PLUS_POINT_ONE),
 	                                 m_relax_BP(1) {}

    std::string GetScoreProducerDescription() const
@ -93,8 +93,7 @@ public:
    void PrintRefLength(const std::vector<size_t>& ref_ids);
    void SetBleuParameters(bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
    		bool scaleByInverseLength, bool scaleByAvgInverseLength,
-    		float scaleByX, float historySmoothing, size_t scheme, float relaxBP,
-    		bool useSourceLengthHistory);
+    		float scaleByX, float historySmoothing, size_t scheme);

    void GetNgramMatchCounts(Phrase&,
                             const NGrams&,
@ -163,7 +162,7 @@ private:
    float m_historySmoothing;
    bool m_useSourceLengthHistory;

-    enum SmoothingScheme { PLUS_ONE = 1, LIGHT = 2, PAPINENI = 3 };
+    enum SmoothingScheme { PLUS_ONE = 1, PLUS_POINT_ONE = 2, PAPINENI = 3 };
    SmoothingScheme m_smoothing_scheme;

    // relax application of the BP by setting a value between 0 and 1
--- a/moses/src/ChartRuleLookupManagerOnDisk.cpp
+++ b/moses/src/ChartRuleLookupManagerOnDisk.cpp
@ -242,7 +242,6 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
            const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);

            std::vector<float> weightT = staticData.GetTranslationSystem(TranslationSystem::DEFAULT).GetTranslationWeights();
-            cerr << "Read weightT from translation sytem.. " << std::endl;
            targetPhraseCollection
            = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
                                               ,m_outputFactorsVec
--- a/moses/src/FeatureVector.cpp
+++ b/moses/src/FeatureVector.cpp
@ -323,6 +323,17 @@ namespace Moses {
 		  m_coreFeatures[i] = logOfValue;
 	  }
  }
+  
+  void FVector::printCoreFeatures() {
+    cerr << "core=(";
+    for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+      cerr << m_coreFeatures[i];
+      if (i + 1 < m_coreFeatures.size()) {
+	cerr << ",";
+      }
+    }
+    cerr << ") ";
+  }

  FVector& FVector::operator+= (const FVector& rhs) {
    if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
@ -519,9 +530,9 @@ namespace Moses {

  FValue FVector::l1norm_coreFeatures() const {
    FValue norm = 0;
-    for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+    // ignore Bleu score feature (last feature)
+    for (size_t i = 0; i < m_coreFeatures.size()-1; ++i) 
      norm += abs(m_coreFeatures[i]);
-    }
    return norm;
  }
  
--- a/moses/src/FeatureVector.h
+++ b/moses/src/FeatureVector.h
@ -197,6 +197,7 @@ namespace Moses {
    /** additional */

    void logCoreFeatures(size_t baseOfLog);
+    void printCoreFeatures();
    //scale so that abs. value is less than maxvalue
    void thresholdScale(float maxValue );

--- a/moses/src/PDTAimp.h
+++ b/moses/src/PDTAimp.h
@ -447,14 +447,12 @@ public:
              
              //tally up
              std::vector<float> weightT = system.GetTranslationWeights();
-              std::cerr << "Read weightT from translation sytem.. " << std::endl;
              //float score=std::inner_product(nscores.begin(), nscores.end(), m_weights.begin(), 0.0f);
              float score=std::inner_product(nscores.begin(), nscores.end(), weightT.begin(), 0.0f);

              //count word penalty
              float weightWP = system.GetWeightWordPenalty();
-              std::cerr << "Read weightWP from translation sytem: " << weightWP << std::endl;
-              //score-=tcands[i].tokens.size() * m_weightWP;
+	      //score-=tcands[i].tokens.size() * m_weightWP;
              score-=tcands[i].tokens.size() * weightWP;

              std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(tcands[i].tokens,TScores()));
--- a/moses/src/PhraseDictionary.cpp
+++ b/moses/src/PhraseDictionary.cpp
@ -79,7 +79,6 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
 {
  const StaticData& staticData = StaticData::Instance();
  std::vector<float> weightT = system->GetTranslationWeights();
-  cerr << "Read weightT from translation sytem.. " << std::endl;
  
  if (m_implementation == Memory) {
    // memory phrase table
--- a/moses/src/PhraseDictionaryALSuffixArray.cpp
+++ b/moses/src/PhraseDictionaryALSuffixArray.cpp
@ -57,7 +57,6 @@ void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
  std::auto_ptr<RuleTableLoader> loader =
  RuleTableLoaderFactory::Create(grammarFile);
  std::vector<float> weightT = StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT).GetTranslationWeights();
-  cerr << "Read weightT from translation sytem.. " << std::endl;
  //bool ret = loader->Load(*m_input, *m_output, inFile, *m_weight, m_tableLimit,
  bool ret = loader->Load(*m_input, *m_output, inFile, weightT, m_tableLimit,
                          *m_languageModels, m_wpProducer, *this);
--- a/moses/src/PhraseDictionaryOnDisk.cpp
+++ b/moses/src/PhraseDictionaryOnDisk.cpp
@ -95,7 +95,6 @@ ChartRuleLookupManager *PhraseDictionaryOnDisk::CreateRuleLookupManager(
  const ChartCellCollection &cellCollection)
 {
  std::vector<float> weightT = StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT).GetTranslationWeights();
-  cerr << "Read weightT from translation sytem.. " << std::endl;	
  return new ChartRuleLookupManagerOnDisk(sentence, cellCollection, *this,
                                          m_dbWrapper, m_languageModels,
                                          m_wpProducer, m_inputFactorsVec,
--- a/moses/src/ScoreComponentCollection.h
+++ b/moses/src/ScoreComponentCollection.h
@ -78,6 +78,10 @@ private:
  }

 public:
+  static void ResetCounter() {
+    s_denseVectorSize = 0;
+  }
+
  //! Create a new score collection with all values set to 0.0
  ScoreComponentCollection();

@ -314,6 +318,10 @@ public:
  void LogCoreFeatures(size_t baseOfLog) {
 		m_scores.logCoreFeatures(baseOfLog);
 	}
+  
+  void PrintCoreFeatures() {
+    m_scores.printCoreFeatures();
+  }

 	void ThresholdScaling(float maxValue)
 	{
--- a/moses/src/ScoreProducer.h
+++ b/moses/src/ScoreProducer.h
@ -34,6 +34,10 @@ public:

  static const size_t unlimited;

+  static void ResetDescriptionCounts() {
+    description_counts.clear();
+  }
+
 	//! returns the number of scores that a subclass produces.
 	//! For example, a language model conventionally produces 1, a translation table some arbitrary number, etc
  //! sparse features returned unlimited
--- a/moses/src/SearchCubePruning.cpp
+++ b/moses/src/SearchCubePruning.cpp
@ -145,7 +145,7 @@ void SearchCubePruning::ProcessSentence()
    stackNo++;
  }

-  PrintBitmapContainerGraph();
+  //PrintBitmapContainerGraph();

  // some more logging
  IFVERBOSE(2) {
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@ -116,6 +116,49 @@ StaticData::StaticData()
  Phrase::InitializeMemPool();
 }

+void StaticData::ClearData() {
+  for (size_t i=0; i < m_decodeGraphs.size(); ++i)
+    delete m_decodeGraphs[i];
+  m_decodeGraphs.clear();
+  m_decodeGraphBackoff.clear();
+
+  m_translationSystems.clear();
+  for (size_t i=0; i < m_wordPenaltyProducers.size(); ++i) {
+    ScoreComponentCollection::UnregisterScoreProducer(m_wordPenaltyProducers[i]);
+    delete m_wordPenaltyProducers[i];
+  }
+  m_wordPenaltyProducers.clear();
+  for (size_t i=0; i < m_distortionScoreProducers.size(); ++i) {
+    ScoreComponentCollection::UnregisterScoreProducer(m_distortionScoreProducers[i]);
+    delete m_distortionScoreProducers[i];
+  }
+  m_distortionScoreProducers.clear();
+  for (size_t i=0; i < m_phraseDictionary.size(); ++i) {
+    ScoreComponentCollection::UnregisterScoreProducer(m_phraseDictionary[i]);
+    delete m_phraseDictionary[i];
+  }
+  m_phraseDictionary.clear();
+  for (size_t i=0; i < m_reorderModels.size(); ++i) {
+    ScoreComponentCollection::UnregisterScoreProducer(m_reorderModels[i]);
+    delete m_reorderModels[i];
+  }
+  m_reorderModels.clear();
+  for (LMList::const_iterator k = m_languageModel.begin(); k != m_languageModel.end(); ++k) {
+    ScoreComponentCollection::UnregisterScoreProducer(*k);
+    //    delete *k;
+  }
+  m_languageModel.CleanUp();
+
+  ScoreComponentCollection::UnregisterScoreProducer(m_bleuScoreFeature);
+  ScoreComponentCollection::UnregisterScoreProducer(m_unknownWordPenaltyProducer);
+
+  m_inputFactorOrder.clear();
+  m_outputFactorOrder.clear();
+
+  ScoreComponentCollection::ResetCounter();
+  ScoreProducer::ResetDescriptionCounts();
+}
+
 bool StaticData::LoadData(Parameter *parameter)
 {
  ResetUserTime();
@ -282,6 +325,8 @@ bool StaticData::LoadData(Parameter *parameter)
    m_useTransOptCache = false;
  }

+  std::cerr << "transOptCache: " << m_useTransOptCache << std::endl;
+  std::cerr << "transOptCache max size: " << m_transOptCacheMaxSize << std::endl;

  //input factors
  const vector<string> &inputFactorVector = m_parameter->GetParam("input-factors");
@ -343,6 +388,7 @@ bool StaticData::LoadData(Parameter *parameter)
  // settings for pruning
  m_maxHypoStackSize = (m_parameter->GetParam("stack").size() > 0)
                       ? Scan<size_t>(m_parameter->GetParam("stack")[0]) : DEFAULT_MAX_HYPOSTACK_SIZE;
+  std::cerr << "max stack size: " << m_maxHypoStackSize << std::endl;
  m_minHypoStackDiversity = 0;
  if (m_parameter->GetParam("stack-diversity").size() > 0) {
    if (m_maxDistortion > 15) {
@ -366,6 +412,10 @@ bool StaticData::LoadData(Parameter *parameter)
                                 TransformScore(Scan<float>(m_parameter->GetParam("translation-option-threshold")[0]))
                                 : TransformScore(DEFAULT_TRANSLATION_OPTION_THRESHOLD);

+  std::cerr << "beamwidth: " << m_beamWidth << std::endl;
+  std::cerr << "early discarding threshold: " << m_earlyDiscardingThreshold << std::endl;
+  std::cerr << "translOptThreshold: " << m_translationOptionThreshold << std::endl;
+
  m_maxNoTransOptPerCoverage = (m_parameter->GetParam("max-trans-opt-per-coverage").size() > 0)
                               ? Scan<size_t>(m_parameter->GetParam("max-trans-opt-per-coverage")[0]) : DEFAULT_MAX_TRANS_OPT_SIZE;

@ -1697,7 +1747,7 @@ bool StaticData::LoadPhrasePairFeature()
  
  size_t sourceFactorId = Scan<size_t>(factors[0]);
  size_t targetFactorId = Scan<size_t>(factors[1]);
-  bool simple = true, sourceContext = false, ignorePunctuation = true;
+  bool simple = true, sourceContext = false, ignorePunctuation = false;
  if (tokens.size() >= 3) {
  	simple = Scan<size_t>(tokens[1]);
  	sourceContext = Scan<size_t>(tokens[2]);
@ -1837,7 +1887,7 @@ bool StaticData::LoadWordTranslationFeature()
  FactorType factorIdSource = Scan<size_t>(factors[0]);
  FactorType factorIdTarget = Scan<size_t>(factors[1]);
  
-  bool simple = true, sourceTrigger = false, targetTrigger = false, ignorePunctuation = true;
+  bool simple = true, sourceTrigger = false, targetTrigger = false, ignorePunctuation = false;
  if (tokens.size() >= 4) {
 	simple = Scan<size_t>(tokens[1]);
  	sourceTrigger = Scan<size_t>(tokens[2]);
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@ -302,11 +302,19 @@ public:

  //! Load data into static instance. This function is required as LoadData() is not const
  static bool LoadDataStatic(Parameter *parameter) {
+    std::cerr << "Load static data.." << std::endl;
    return s_instance.LoadData(parameter);
+    std::cerr << "done.." << std::endl;
+  }
+  static void ClearDataStatic() {
+    std::cerr << "Clear static data.." << std::endl;
+    s_instance.ClearData();
+    std::cerr << "done.." << std::endl;
  }

  //! Main function to load everything. Also initialize the Parameter object
  bool LoadData(Parameter *parameter);
+  void ClearData();

  const PARAM_VEC &GetParam(const std::string &paramName) const {
    return m_parameter->GetParam(paramName);
@ -477,6 +485,10 @@ public:
  LMList GetLMList() const { 
    return m_languageModel; 
  }
+  WordPenaltyProducer* GetWordPenaltyProducer() const {
+    assert(m_wordPenaltyProducers.size() >= 1);
+    return m_wordPenaltyProducers[0];
+  }
  size_t GetNumInputScores() const {
    return m_numInputScores;
  }
--- a/moses/src/TargetPhrase.cpp
+++ b/moses/src/TargetPhrase.cpp
@ -145,7 +145,6 @@ void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
  const TranslationSystem& system =  StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT);
  std::vector<float> weightsT = system.GetTranslationWeights();
  weightWP = system.GetWeightWordPenalty();
-  VERBOSE(1, cerr << "weightWP: " << weightWP << std::endl);
  
  //m_transScore = std::inner_product(scoreVector.begin(), scoreVector.end(), weightT.begin(), 0.0f);
  m_transScore = std::inner_product(scoreVector.begin(), scoreVector.end(), weightsT.begin(), 0.0f);
--- a/moses/src/TranslationSystem.cpp
+++ b/moses/src/TranslationSystem.cpp
@ -161,7 +161,9 @@ namespace Moses {
     }
    
    float TranslationSystem::GetWeightWordPenalty() const {
-      return StaticData::Instance().GetWeight(m_wpProducer);
+      float weightWP = StaticData::Instance().GetWeight(m_wpProducer);
+      VERBOSE(1, "Read weightWP from translation sytem: " << weightWP << std::endl);
+      return weightWP;
    }
    
    float TranslationSystem::GetWeightUnknownWordPenalty() const {
@ -175,10 +177,10 @@ namespace Moses {

    std::vector<float> TranslationSystem::GetTranslationWeights() const {
    	std::vector<float> weights = StaticData::Instance().GetWeights(GetTranslationScoreProducer());
-    	VERBOSE(1, cerr << "Read weightT from translation sytem.. ");
+    	VERBOSE(1, "Read weightT from translation sytem.. ");
    	for (size_t i = 0; i < weights.size(); ++i)
-	  VERBOSE(1, std::cerr << weights[i] << " ");
-    	VERBOSE(1, std::cerr << std::endl);
+	  VERBOSE(1, weights[i] << " ");
+    	VERBOSE(1, std::endl);
    	return weights;
    }
 };