specify 5 different update schemes

2024-12-28 14:32:38 +03:00 · 2012-03-05 00:55:36 +00:00 · 2012-03-05 00:55:36 +00:00 · ba7da1042d
commit ba7da1042d
parent eb69065fa5
6 changed files with 95 additions and 7 deletions
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@ -125,6 +125,7 @@ int main(int argc, char** argv) {
 	bool boost;
 	bool decode_hope, decode_fear, decode_model;
 	string decode_filename;
+	size_t update_scheme;
 	po::options_description desc("Allowed options");
 	desc.add_options()
 		("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
@ -198,6 +199,7 @@ int main(int argc, char** argv) {
 		("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
 		("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
 		("threads", po::value<int>(&threadcount)->default_value(1), "Number of threads used")
+		("update-scheme", po::value<size_t>(&update_scheme)->default_value(1), "Update scheme, default: 1")
 		("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
 		("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
 		("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");
@ -319,7 +321,7 @@ int main(int argc, char** argv) {
 			cerr << "Optimising using Mira" << endl;
 			cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl;
 		}
-		optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack, boost);
+		optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack, boost, update_scheme);
 		learning_rate = mira_learning_rate;
 		perceptron_update = false;
 	} else if (learner == "perceptron") {
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@ -359,9 +359,50 @@ size_t MiraOptimiser::updateWeightsAnalytically(

 // cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl;
 // cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl;
-  ScoreComponentCollection featureValueDiff = featureValuesHope;
-  featureValueDiff.MinusEquals(featureValuesFear);
-  //  cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
+
+  // scenario 1: reward only-hope, penalize only-fear
+  // scenario 2: reward all-hope, penalize only-fear
+  // scenario 3: reward all-hope
+  // scenario 4: reward strongly only-hope, reward mildly all-hope
+  // scenario 5: reward strongly only-hope, reward mildly all-hope, penalize only-fear
+
+  ScoreComponentCollection featureValueDiff;
+  switch (m_update_scheme) {
+  case 2:
+	  // values: 1: all-hope, -1: only-fear
+	  featureValueDiff = featureValuesHope;
+	  featureValueDiff.MinusEquals(featureValuesFear);
+	  featureValueDiff.SparsePlusEquals(featureValuesHope);
+	  //max: 1 (set all 2 to 1)
+	  featureValueDiff.CapMax(1);
+	  break;
+  case 3:
+	  // values: 1: all-hope
+	  featureValueDiff = featureValuesHope;
+	  break;
+  case 4:
+	  // values: 2: only-hope, 1: both
+	  featureValueDiff = featureValuesHope;
+	  featureValueDiff.MinusEquals(featureValuesFear);
+	  featureValueDiff.SparsePlusEquals(featureValuesHope);
+	  // min: 0 (set all -1 to 0)
+	  featureValueDiff.CapMin(0);
+	  break;
+  case 5:
+	  // values: 2: only-hope, 1: both, -1: only-fear
+	  featureValueDiff = featureValuesHope;
+	  featureValueDiff.MinusEquals(featureValuesFear);
+	  featureValueDiff.SparsePlusEquals(featureValuesHope);
+	  break;
+  case 1:
+  default:
+	  // values: 1: only-hope, -1: only-fear
+	  featureValueDiff = featureValuesHope;
+	  featureValueDiff.MinusEquals(featureValuesFear);
+	  break;
+  }
+
+  cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
 //  float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
  float modelScoreDiff = modelScoreHope - modelScoreFear;
  float loss = bleuScoreHope - bleuScoreFear;
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@ -65,14 +65,16 @@ namespace Mira {
 	  MiraOptimiser() :
 		  Optimiser() { }

-	  MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, size_t scale_update, float margin_slack, bool boost) :
+	  MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin,
+size_t scale_update, float margin_slack, bool boost, size_t update_scheme) :
 		  Optimiser(),
 		  m_onlyViolatedConstraints(onlyViolatedConstraints),
 		  m_slack(slack),
 		  m_scale_margin(scale_margin),
 		  m_scale_update(scale_update),
 		  m_margin_slack(margin_slack),
-		  m_boost(boost) { }
+		  m_boost(boost),
+		  m_update_scheme(update_scheme) { }
   
 	  size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
 	  								Moses::ScoreComponentCollection& weightUpdate,
@ -158,6 +160,9 @@ namespace Mira {

      // boosting of updates on misranked candidates
      bool m_boost;
+
+      // select 1 of 5 different update schemes
+      size_t m_update_scheme;
  };
 }

--- a/moses/src/FeatureVector.cpp
+++ b/moses/src/FeatureVector.cpp
@ -235,6 +235,18 @@ namespace Moses {
    operator*=(factor);
  }

+  void FVector::capMax(FValue maxValue) {
+    for (const_iterator i = cbegin(); i != cend(); ++i)
+      if (i->second > maxValue)
+         set(i->first, maxValue);
+  }
+
+  void FVector::capMin(FValue minValue) {
+    for (const_iterator i = cbegin(); i != cend(); ++i)
+      if (i->second < minValue)
+         set(i->first, minValue);
+  }
+
  void FVector::set(const FName& name, const FValue& value) {
    m_features[name] = value;
  }
@ -268,6 +280,12 @@ namespace Moses {
    return *this;
  }
  
+  // add only sparse features
+  void FVector::sparsePlusEquals(const FVector& rhs) {
+    for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+	  set(i->first, get(i->first) + i->second);
+  }
+
  FVector& FVector::operator-= (const FVector& rhs) {
    if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
      resize(rhs.m_coreFeatures.size());
@ -423,5 +441,4 @@ namespace Moses {
      return lhs.inner_product(rhs);
    }
  }
-	
 }
--- a/moses/src/FeatureVector.h
+++ b/moses/src/FeatureVector.h
@ -185,6 +185,11 @@ namespace Moses {
    //scale so that abs. value is less than maxvalue
    void thresholdScale(float maxValue );

+    void capMax(FValue maxValue);
+    void capMin(FValue minValue);
+
+    void sparsePlusEquals(const FVector& rhs);
+
 #ifdef MPI_ENABLE
    friend class boost::serialization::access;
 #endif  
--- a/moses/src/ScoreComponentCollection.h
+++ b/moses/src/ScoreComponentCollection.h
@ -140,6 +140,12 @@ public:
 	  m_scores += rhs.m_scores;
 	}

+	// add only sparse features
+	void SparsePlusEquals(const ScoreComponentCollection& rhs)
+	{
+	  m_scores.sparsePlusEquals(rhs.m_scores);
+	}
+
 	void PlusEquals(const FVector& scores)
 	{
 		m_scores += scores;
@ -303,6 +309,18 @@ public:
    m_scores.thresholdScale(maxValue);
 	}

+	void CapMax(float maxValue)
+	{
+		// cap all sparse features to maxValue
+		m_scores.capMax(maxValue);
+	}
+
+	void CapMin(float minValue)
+	{
+		// cap all sparse features to minValue
+		m_scores.capMin(minValue);
+	}
+
 	//! if a ScoreProducer produces a single score (for example, a language model score)
 	//! this will return it.  If not, this method will throw
 	float GetScoreForProducer(const ScoreProducer* sp) const