specify 5 different update schemes

This commit is contained in:
Eva 2012-03-05 00:55:36 +00:00
parent eb69065fa5
commit ba7da1042d
6 changed files with 95 additions and 7 deletions

View File

@ -125,6 +125,7 @@ int main(int argc, char** argv) {
bool boost;
bool decode_hope, decode_fear, decode_model;
string decode_filename;
size_t update_scheme;
po::options_description desc("Allowed options");
desc.add_options()
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
@ -198,6 +199,7 @@ int main(int argc, char** argv) {
("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
("threads", po::value<int>(&threadcount)->default_value(1), "Number of threads used")
("update-scheme", po::value<size_t>(&update_scheme)->default_value(1), "Update scheme, default: 1")
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");
@ -319,7 +321,7 @@ int main(int argc, char** argv) {
cerr << "Optimising using Mira" << endl;
cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl;
}
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack, boost);
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack, boost, update_scheme);
learning_rate = mira_learning_rate;
perceptron_update = false;
} else if (learner == "perceptron") {

View File

@ -359,9 +359,50 @@ size_t MiraOptimiser::updateWeightsAnalytically(
// cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl;
// cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl;
ScoreComponentCollection featureValueDiff = featureValuesHope;
featureValueDiff.MinusEquals(featureValuesFear);
// cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
// scenario 1: reward only-hope, penalize only-fear
// scenario 2: reward all-hope, penalize only-fear
// scenario 3: reward all-hope
// scenario 4: reward strongly only-hope, reward mildly all-hope
// scenario 5: reward strongly only-hope, reward mildly all-hope, penalize only-fear
ScoreComponentCollection featureValueDiff;
switch (m_update_scheme) {
case 2:
// values: 1: all-hope, -1: only-fear
featureValueDiff = featureValuesHope;
featureValueDiff.MinusEquals(featureValuesFear);
featureValueDiff.SparsePlusEquals(featureValuesHope);
//max: 1 (set all 2 to 1)
featureValueDiff.CapMax(1);
break;
case 3:
// values: 1: all-hope
featureValueDiff = featureValuesHope;
break;
case 4:
// values: 2: only-hope, 1: both
featureValueDiff = featureValuesHope;
featureValueDiff.MinusEquals(featureValuesFear);
featureValueDiff.SparsePlusEquals(featureValuesHope);
// min: 0 (set all -1 to 0)
featureValueDiff.CapMin(0);
break;
case 5:
// values: 2: only-hope, 1: both, -1: only-fear
featureValueDiff = featureValuesHope;
featureValueDiff.MinusEquals(featureValuesFear);
featureValueDiff.SparsePlusEquals(featureValuesHope);
break;
case 1:
default:
// values: 1: only-hope, -1: only-fear
featureValueDiff = featureValuesHope;
featureValueDiff.MinusEquals(featureValuesFear);
break;
}
cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
// float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
float modelScoreDiff = modelScoreHope - modelScoreFear;
float loss = bleuScoreHope - bleuScoreFear;

View File

@ -65,14 +65,16 @@ namespace Mira {
MiraOptimiser() :
Optimiser() { }
MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, size_t scale_update, float margin_slack, bool boost) :
MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin,
size_t scale_update, float margin_slack, bool boost, size_t update_scheme) :
Optimiser(),
m_onlyViolatedConstraints(onlyViolatedConstraints),
m_slack(slack),
m_scale_margin(scale_margin),
m_scale_update(scale_update),
m_margin_slack(margin_slack),
m_boost(boost) { }
m_boost(boost),
m_update_scheme(update_scheme) { }
size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
Moses::ScoreComponentCollection& weightUpdate,
@ -158,6 +160,9 @@ namespace Mira {
// boosting of updates on misranked candidates
bool m_boost;
// select 1 of 5 different update schemes
size_t m_update_scheme;
};
}

View File

@ -235,6 +235,18 @@ namespace Moses {
operator*=(factor);
}
void FVector::capMax(FValue maxValue) {
for (const_iterator i = cbegin(); i != cend(); ++i)
if (i->second > maxValue)
set(i->first, maxValue);
}
void FVector::capMin(FValue minValue) {
for (const_iterator i = cbegin(); i != cend(); ++i)
if (i->second < minValue)
set(i->first, minValue);
}
void FVector::set(const FName& name, const FValue& value) {
m_features[name] = value;
}
@ -268,6 +280,12 @@ namespace Moses {
return *this;
}
// add only sparse features
void FVector::sparsePlusEquals(const FVector& rhs) {
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
set(i->first, get(i->first) + i->second);
}
FVector& FVector::operator-= (const FVector& rhs) {
if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
resize(rhs.m_coreFeatures.size());
@ -423,5 +441,4 @@ namespace Moses {
return lhs.inner_product(rhs);
}
}
}

View File

@ -185,6 +185,11 @@ namespace Moses {
//scale so that abs. value is less than maxvalue
void thresholdScale(float maxValue );
void capMax(FValue maxValue);
void capMin(FValue minValue);
void sparsePlusEquals(const FVector& rhs);
#ifdef MPI_ENABLE
friend class boost::serialization::access;
#endif

View File

@ -140,6 +140,12 @@ public:
m_scores += rhs.m_scores;
}
// add only sparse features
void SparsePlusEquals(const ScoreComponentCollection& rhs)
{
m_scores.sparsePlusEquals(rhs.m_scores);
}
void PlusEquals(const FVector& scores)
{
m_scores += scores;
@ -303,6 +309,18 @@ public:
m_scores.thresholdScale(maxValue);
}
void CapMax(float maxValue)
{
// cap all sparse features to maxValue
m_scores.capMax(maxValue);
}
void CapMin(float minValue)
{
// cap all sparse features to minValue
m_scores.capMin(minValue);
}
//! if a ScoreProducer produces a single score (for example, a language model score)
//! this will return it. If not, this method will throw
float GetScoreForProducer(const ScoreProducer* sp) const