add parameter --normalise-margin

This commit is contained in:
Eva Hasler 2012-03-06 18:32:02 +00:00
parent 45f04073f4
commit 75e388ad1d
3 changed files with 52 additions and 10 deletions

View File

@ -99,7 +99,7 @@ int main(int argc, char** argv) {
string decoder_settings;
float min_weight_change;
float decrease_learning_rate;
bool normaliseWeights;
bool normaliseWeights, normaliseMargin;
bool print_feature_values;
bool historyOf1best;
bool historyOfOracles;
@ -179,6 +179,7 @@ int main(int argc, char** argv) {
("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimisation")
("nbest,n", po::value<size_t>(&n)->default_value(1), "Number of translations in n-best list")
("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
("normalise-margin", po::value<bool>(&normaliseMargin)->default_value(false), "Normalise the margin: squash between 0 and 1")
("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
("perceptron-learning-rate", po::value<float>(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate")
("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
@ -322,7 +323,8 @@ int main(int argc, char** argv) {
cerr << "Optimising using Mira" << endl;
cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl;
}
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack, boost, update_scheme);
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update,
margin_slack, boost, update_scheme, normaliseMargin);
learning_rate = mira_learning_rate;
perceptron_update = false;
} else if (learner == "perceptron") {

View File

@ -68,25 +68,31 @@ size_t MiraOptimiser::updateWeights(
// float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
float modelScoreDiff = oracleModelScores[i] - modelScores[i][j];
float diff = 0;
if (loss > (modelScoreDiff + m_margin_slack)) {
diff = loss - (modelScoreDiff + m_margin_slack);
}
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
if (loss > (modelScoreDiff + m_margin_slack)) {
diff = loss - (modelScoreDiff + m_margin_slack);
}
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
if (diff > epsilon) {
violated = true;
}
else if (m_onlyViolatedConstraints) {
addConstraint = false;
}
}
float lossMinusModelScoreDiff = loss - modelScoreDiff;
float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_margin_slack);
if (addConstraint) {
if (m_normaliseMargin)
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
featureValueDiffs.push_back(featureValueDiff);
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
all_losses.push_back(loss);
if (violated) {
if (m_normaliseMargin)
diff = (2/(1 + exp(- diff))) - 1;
++violatedConstraintsBefore;
oldDistanceFromOptimum += diff;
}
@ -242,12 +248,18 @@ size_t MiraOptimiser::updateWeightsHopeFear(
float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_margin_slack);
if (addConstraint) {
if (m_normaliseMargin)
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
featureValueDiffs.push_back(featureValueDiff);
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
modelScoreDiffs.push_back(modelScoreDiff);
all_losses.push_back(loss);
if (violated) {
if (m_normaliseMargin)
diff = (2/(1 + exp(- diff))) - 1;
++violatedConstraintsBefore;
oldDistanceFromOptimum += diff;
}
@ -434,6 +446,12 @@ size_t MiraOptimiser::updateWeightsAnalytically(
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
if (diff > epsilon) {
// squash it between 0 and 1
//diff = tanh(diff);
//diff = (2/(1 + pow(2,- diff))) - 1;
if (m_normaliseMargin)
diff = (2/(1 + exp(- diff))) - 1;
// constraint violated
oldDistanceFromOptimum += diff;
constraintViolatedBefore = true;
@ -562,12 +580,18 @@ size_t MiraOptimiser::updateWeightsRankModel(
float lossMinusModelScoreDiff = loss - modelScoreDiff;
if (addConstraint) {
if (m_normaliseMargin)
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
featureValueDiffs.push_back(featureValueDiff);
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
modelScoreDiffs.push_back(modelScoreDiff);
all_losses.push_back(loss);
if (violated) {
if (m_normaliseMargin)
diff = (2/(1 + exp(- diff))) - 1;
++violatedConstraintsBefore;
oldDistanceFromOptimum += diff;
}
@ -706,12 +730,18 @@ size_t MiraOptimiser::updateWeightsHopeFearAndRankModel(
float lossMinusModelScoreDiff = loss - modelScoreDiff;
if (addConstraint) {
if (m_normaliseMargin)
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
featureValueDiffs.push_back(featureValueDiff);
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
modelScoreDiffs.push_back(modelScoreDiff);
all_losses.push_back(loss);
if (violated) {
if (m_normaliseMargin)
diff = (2/(1 + exp(- diff))) - 1;
++violatedConstraintsBefore;
oldDistanceFromOptimum += diff;
}
@ -756,12 +786,18 @@ size_t MiraOptimiser::updateWeightsHopeFearAndRankModel(
float lossMinusModelScoreDiff = loss - modelScoreDiff;
if (addConstraint) {
if (m_normaliseMargin)
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
featureValueDiffs.push_back(featureValueDiff);
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
modelScoreDiffs.push_back(modelScoreDiff);
all_losses.push_back(loss);
if (violated) {
if (m_normaliseMargin)
diff = (2/(1 + exp(- diff))) - 1;
++violatedConstraintsBefore;
oldDistanceFromOptimum += diff;
}

View File

@ -66,7 +66,7 @@ namespace Mira {
Optimiser() { }
MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin,
size_t scale_update, float margin_slack, bool boost, size_t update_scheme) :
size_t scale_update, float margin_slack, bool boost, size_t update_scheme, bool normaliseMargin) :
Optimiser(),
m_onlyViolatedConstraints(onlyViolatedConstraints),
m_slack(slack),
@ -74,7 +74,8 @@ size_t scale_update, float margin_slack, bool boost, size_t update_scheme) :
m_scale_update(scale_update),
m_margin_slack(margin_slack),
m_boost(boost),
m_update_scheme(update_scheme) { }
m_update_scheme(update_scheme),
m_normaliseMargin(normaliseMargin) { }
size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
Moses::ScoreComponentCollection& weightUpdate,
@ -163,6 +164,9 @@ size_t scale_update, float margin_slack, bool boost, size_t update_scheme) :
// select 1 of 5 different update schemes
size_t m_update_scheme;
// squash margin between 0 and 1
bool m_normaliseMargin;
};
}