mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 14:32:38 +03:00
specify 5 different update schemes
This commit is contained in:
parent
eb69065fa5
commit
ba7da1042d
@ -125,6 +125,7 @@ int main(int argc, char** argv) {
|
||||
bool boost;
|
||||
bool decode_hope, decode_fear, decode_model;
|
||||
string decode_filename;
|
||||
size_t update_scheme;
|
||||
po::options_description desc("Allowed options");
|
||||
desc.add_options()
|
||||
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
|
||||
@ -198,6 +199,7 @@ int main(int argc, char** argv) {
|
||||
("slack-step", po::value<float>(&slack_step)->default_value(0), "Increase slack from epoch to epoch by the value provided")
|
||||
("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
|
||||
("threads", po::value<int>(&threadcount)->default_value(1), "Number of threads used")
|
||||
("update-scheme", po::value<size_t>(&update_scheme)->default_value(1), "Update scheme, default: 1")
|
||||
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
|
||||
("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
|
||||
("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");
|
||||
@ -319,7 +321,7 @@ int main(int argc, char** argv) {
|
||||
cerr << "Optimising using Mira" << endl;
|
||||
cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl;
|
||||
}
|
||||
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack, boost);
|
||||
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack, boost, update_scheme);
|
||||
learning_rate = mira_learning_rate;
|
||||
perceptron_update = false;
|
||||
} else if (learner == "perceptron") {
|
||||
|
@ -359,9 +359,50 @@ size_t MiraOptimiser::updateWeightsAnalytically(
|
||||
|
||||
// cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl;
|
||||
// cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl;
|
||||
ScoreComponentCollection featureValueDiff = featureValuesHope;
|
||||
featureValueDiff.MinusEquals(featureValuesFear);
|
||||
// cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
|
||||
|
||||
// scenario 1: reward only-hope, penalize only-fear
|
||||
// scenario 2: reward all-hope, penalize only-fear
|
||||
// scenario 3: reward all-hope
|
||||
// scenario 4: reward strongly only-hope, reward mildly all-hope
|
||||
// scenario 5: reward strongly only-hope, reward mildly all-hope, penalize only-fear
|
||||
|
||||
ScoreComponentCollection featureValueDiff;
|
||||
switch (m_update_scheme) {
|
||||
case 2:
|
||||
// values: 1: all-hope, -1: only-fear
|
||||
featureValueDiff = featureValuesHope;
|
||||
featureValueDiff.MinusEquals(featureValuesFear);
|
||||
featureValueDiff.SparsePlusEquals(featureValuesHope);
|
||||
//max: 1 (set all 2 to 1)
|
||||
featureValueDiff.CapMax(1);
|
||||
break;
|
||||
case 3:
|
||||
// values: 1: all-hope
|
||||
featureValueDiff = featureValuesHope;
|
||||
break;
|
||||
case 4:
|
||||
// values: 2: only-hope, 1: both
|
||||
featureValueDiff = featureValuesHope;
|
||||
featureValueDiff.MinusEquals(featureValuesFear);
|
||||
featureValueDiff.SparsePlusEquals(featureValuesHope);
|
||||
// min: 0 (set all -1 to 0)
|
||||
featureValueDiff.CapMin(0);
|
||||
break;
|
||||
case 5:
|
||||
// values: 2: only-hope, 1: both, -1: only-fear
|
||||
featureValueDiff = featureValuesHope;
|
||||
featureValueDiff.MinusEquals(featureValuesFear);
|
||||
featureValueDiff.SparsePlusEquals(featureValuesHope);
|
||||
break;
|
||||
case 1:
|
||||
default:
|
||||
// values: 1: only-hope, -1: only-fear
|
||||
featureValueDiff = featureValuesHope;
|
||||
featureValueDiff.MinusEquals(featureValuesFear);
|
||||
break;
|
||||
}
|
||||
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
|
||||
// float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
|
||||
float modelScoreDiff = modelScoreHope - modelScoreFear;
|
||||
float loss = bleuScoreHope - bleuScoreFear;
|
||||
|
@ -65,14 +65,16 @@ namespace Mira {
|
||||
MiraOptimiser() :
|
||||
Optimiser() { }
|
||||
|
||||
MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin, size_t scale_update, float margin_slack, bool boost) :
|
||||
MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin,
|
||||
size_t scale_update, float margin_slack, bool boost, size_t update_scheme) :
|
||||
Optimiser(),
|
||||
m_onlyViolatedConstraints(onlyViolatedConstraints),
|
||||
m_slack(slack),
|
||||
m_scale_margin(scale_margin),
|
||||
m_scale_update(scale_update),
|
||||
m_margin_slack(margin_slack),
|
||||
m_boost(boost) { }
|
||||
m_boost(boost),
|
||||
m_update_scheme(update_scheme) { }
|
||||
|
||||
size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
@ -158,6 +160,9 @@ namespace Mira {
|
||||
|
||||
// boosting of updates on misranked candidates
|
||||
bool m_boost;
|
||||
|
||||
// select 1 of 5 different update schemes
|
||||
size_t m_update_scheme;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -235,6 +235,18 @@ namespace Moses {
|
||||
operator*=(factor);
|
||||
}
|
||||
|
||||
void FVector::capMax(FValue maxValue) {
|
||||
for (const_iterator i = cbegin(); i != cend(); ++i)
|
||||
if (i->second > maxValue)
|
||||
set(i->first, maxValue);
|
||||
}
|
||||
|
||||
void FVector::capMin(FValue minValue) {
|
||||
for (const_iterator i = cbegin(); i != cend(); ++i)
|
||||
if (i->second < minValue)
|
||||
set(i->first, minValue);
|
||||
}
|
||||
|
||||
void FVector::set(const FName& name, const FValue& value) {
|
||||
m_features[name] = value;
|
||||
}
|
||||
@ -268,6 +280,12 @@ namespace Moses {
|
||||
return *this;
|
||||
}
|
||||
|
||||
// add only sparse features
|
||||
void FVector::sparsePlusEquals(const FVector& rhs) {
|
||||
for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
|
||||
set(i->first, get(i->first) + i->second);
|
||||
}
|
||||
|
||||
FVector& FVector::operator-= (const FVector& rhs) {
|
||||
if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
|
||||
resize(rhs.m_coreFeatures.size());
|
||||
@ -423,5 +441,4 @@ namespace Moses {
|
||||
return lhs.inner_product(rhs);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -185,6 +185,11 @@ namespace Moses {
|
||||
//scale so that abs. value is less than maxvalue
|
||||
void thresholdScale(float maxValue );
|
||||
|
||||
void capMax(FValue maxValue);
|
||||
void capMin(FValue minValue);
|
||||
|
||||
void sparsePlusEquals(const FVector& rhs);
|
||||
|
||||
#ifdef MPI_ENABLE
|
||||
friend class boost::serialization::access;
|
||||
#endif
|
||||
|
@ -140,6 +140,12 @@ public:
|
||||
m_scores += rhs.m_scores;
|
||||
}
|
||||
|
||||
// add only sparse features
|
||||
void SparsePlusEquals(const ScoreComponentCollection& rhs)
|
||||
{
|
||||
m_scores.sparsePlusEquals(rhs.m_scores);
|
||||
}
|
||||
|
||||
void PlusEquals(const FVector& scores)
|
||||
{
|
||||
m_scores += scores;
|
||||
@ -303,6 +309,18 @@ public:
|
||||
m_scores.thresholdScale(maxValue);
|
||||
}
|
||||
|
||||
void CapMax(float maxValue)
|
||||
{
|
||||
// cap all sparse features to maxValue
|
||||
m_scores.capMax(maxValue);
|
||||
}
|
||||
|
||||
void CapMin(float minValue)
|
||||
{
|
||||
// cap all sparse features to minValue
|
||||
m_scores.capMin(minValue);
|
||||
}
|
||||
|
||||
//! if a ScoreProducer produces a single score (for example, a language model score)
|
||||
//! this will return it. If not, this method will throw
|
||||
float GetScoreForProducer(const ScoreProducer* sp) const
|
||||
|
Loading…
Reference in New Issue
Block a user