mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-19 23:27:46 +03:00
add parameter --normalise-margin
This commit is contained in:
parent
45f04073f4
commit
75e388ad1d
@ -99,7 +99,7 @@ int main(int argc, char** argv) {
|
||||
string decoder_settings;
|
||||
float min_weight_change;
|
||||
float decrease_learning_rate;
|
||||
bool normaliseWeights;
|
||||
bool normaliseWeights, normaliseMargin;
|
||||
bool print_feature_values;
|
||||
bool historyOf1best;
|
||||
bool historyOfOracles;
|
||||
@ -179,6 +179,7 @@ int main(int argc, char** argv) {
|
||||
("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimisation")
|
||||
("nbest,n", po::value<size_t>(&n)->default_value(1), "Number of translations in n-best list")
|
||||
("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
|
||||
("normalise-margin", po::value<bool>(&normaliseMargin)->default_value(false), "Normalise the margin: squash between 0 and 1")
|
||||
("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
|
||||
("perceptron-learning-rate", po::value<float>(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate")
|
||||
("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
|
||||
@ -322,7 +323,8 @@ int main(int argc, char** argv) {
|
||||
cerr << "Optimising using Mira" << endl;
|
||||
cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl;
|
||||
}
|
||||
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update, margin_slack, boost, update_scheme);
|
||||
optimiser = new MiraOptimiser(onlyViolatedConstraints, slack, scale_margin, scale_update,
|
||||
margin_slack, boost, update_scheme, normaliseMargin);
|
||||
learning_rate = mira_learning_rate;
|
||||
perceptron_update = false;
|
||||
} else if (learner == "perceptron") {
|
||||
|
@ -68,25 +68,31 @@ size_t MiraOptimiser::updateWeights(
|
||||
// float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
|
||||
float modelScoreDiff = oracleModelScores[i] - modelScores[i][j];
|
||||
float diff = 0;
|
||||
if (loss > (modelScoreDiff + m_margin_slack)) {
|
||||
diff = loss - (modelScoreDiff + m_margin_slack);
|
||||
}
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
|
||||
if (loss > (modelScoreDiff + m_margin_slack)) {
|
||||
diff = loss - (modelScoreDiff + m_margin_slack);
|
||||
}
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
|
||||
|
||||
if (diff > epsilon) {
|
||||
violated = true;
|
||||
}
|
||||
else if (m_onlyViolatedConstraints) {
|
||||
addConstraint = false;
|
||||
}
|
||||
}
|
||||
|
||||
float lossMinusModelScoreDiff = loss - modelScoreDiff;
|
||||
float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_margin_slack);
|
||||
if (addConstraint) {
|
||||
if (m_normaliseMargin)
|
||||
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
|
||||
|
||||
featureValueDiffs.push_back(featureValueDiff);
|
||||
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
|
||||
all_losses.push_back(loss);
|
||||
|
||||
if (violated) {
|
||||
if (m_normaliseMargin)
|
||||
diff = (2/(1 + exp(- diff))) - 1;
|
||||
|
||||
++violatedConstraintsBefore;
|
||||
oldDistanceFromOptimum += diff;
|
||||
}
|
||||
@ -242,12 +248,18 @@ size_t MiraOptimiser::updateWeightsHopeFear(
|
||||
|
||||
float lossMinusModelScoreDiff = loss - (modelScoreDiff + m_margin_slack);
|
||||
if (addConstraint) {
|
||||
if (m_normaliseMargin)
|
||||
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
|
||||
|
||||
featureValueDiffs.push_back(featureValueDiff);
|
||||
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
|
||||
modelScoreDiffs.push_back(modelScoreDiff);
|
||||
all_losses.push_back(loss);
|
||||
|
||||
if (violated) {
|
||||
if (m_normaliseMargin)
|
||||
diff = (2/(1 + exp(- diff))) - 1;
|
||||
|
||||
++violatedConstraintsBefore;
|
||||
oldDistanceFromOptimum += diff;
|
||||
}
|
||||
@ -434,6 +446,12 @@ size_t MiraOptimiser::updateWeightsAnalytically(
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " + " << m_margin_slack << " >= " << loss << " (current violation: " << diff << ")" << endl;
|
||||
|
||||
if (diff > epsilon) {
|
||||
// squash it between 0 and 1
|
||||
//diff = tanh(diff);
|
||||
//diff = (2/(1 + pow(2,- diff))) - 1;
|
||||
if (m_normaliseMargin)
|
||||
diff = (2/(1 + exp(- diff))) - 1;
|
||||
|
||||
// constraint violated
|
||||
oldDistanceFromOptimum += diff;
|
||||
constraintViolatedBefore = true;
|
||||
@ -562,12 +580,18 @@ size_t MiraOptimiser::updateWeightsRankModel(
|
||||
|
||||
float lossMinusModelScoreDiff = loss - modelScoreDiff;
|
||||
if (addConstraint) {
|
||||
if (m_normaliseMargin)
|
||||
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
|
||||
|
||||
featureValueDiffs.push_back(featureValueDiff);
|
||||
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
|
||||
modelScoreDiffs.push_back(modelScoreDiff);
|
||||
all_losses.push_back(loss);
|
||||
|
||||
if (violated) {
|
||||
if (m_normaliseMargin)
|
||||
diff = (2/(1 + exp(- diff))) - 1;
|
||||
|
||||
++violatedConstraintsBefore;
|
||||
oldDistanceFromOptimum += diff;
|
||||
}
|
||||
@ -706,12 +730,18 @@ size_t MiraOptimiser::updateWeightsHopeFearAndRankModel(
|
||||
|
||||
float lossMinusModelScoreDiff = loss - modelScoreDiff;
|
||||
if (addConstraint) {
|
||||
if (m_normaliseMargin)
|
||||
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
|
||||
|
||||
featureValueDiffs.push_back(featureValueDiff);
|
||||
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
|
||||
modelScoreDiffs.push_back(modelScoreDiff);
|
||||
all_losses.push_back(loss);
|
||||
|
||||
if (violated) {
|
||||
if (m_normaliseMargin)
|
||||
diff = (2/(1 + exp(- diff))) - 1;
|
||||
|
||||
++violatedConstraintsBefore;
|
||||
oldDistanceFromOptimum += diff;
|
||||
}
|
||||
@ -756,12 +786,18 @@ size_t MiraOptimiser::updateWeightsHopeFearAndRankModel(
|
||||
|
||||
float lossMinusModelScoreDiff = loss - modelScoreDiff;
|
||||
if (addConstraint) {
|
||||
if (m_normaliseMargin)
|
||||
lossMinusModelScoreDiff = (2/(1 + exp(- lossMinusModelScoreDiff))) - 1;
|
||||
|
||||
featureValueDiffs.push_back(featureValueDiff);
|
||||
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
|
||||
modelScoreDiffs.push_back(modelScoreDiff);
|
||||
all_losses.push_back(loss);
|
||||
|
||||
if (violated) {
|
||||
if (m_normaliseMargin)
|
||||
diff = (2/(1 + exp(- diff))) - 1;
|
||||
|
||||
++violatedConstraintsBefore;
|
||||
oldDistanceFromOptimum += diff;
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ namespace Mira {
|
||||
Optimiser() { }
|
||||
|
||||
MiraOptimiser(bool onlyViolatedConstraints, float slack, size_t scale_margin,
|
||||
size_t scale_update, float margin_slack, bool boost, size_t update_scheme) :
|
||||
size_t scale_update, float margin_slack, bool boost, size_t update_scheme, bool normaliseMargin) :
|
||||
Optimiser(),
|
||||
m_onlyViolatedConstraints(onlyViolatedConstraints),
|
||||
m_slack(slack),
|
||||
@ -74,7 +74,8 @@ size_t scale_update, float margin_slack, bool boost, size_t update_scheme) :
|
||||
m_scale_update(scale_update),
|
||||
m_margin_slack(margin_slack),
|
||||
m_boost(boost),
|
||||
m_update_scheme(update_scheme) { }
|
||||
m_update_scheme(update_scheme),
|
||||
m_normaliseMargin(normaliseMargin) { }
|
||||
|
||||
size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
@ -163,6 +164,9 @@ size_t scale_update, float margin_slack, bool boost, size_t update_scheme) :
|
||||
|
||||
// select 1 of 5 different update schemes
|
||||
size_t m_update_scheme;
|
||||
|
||||
// squash margin between 0 and 1
|
||||
bool m_normaliseMargin;
|
||||
};
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user