mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-10 00:47:31 +03:00
introduce parameter --update-after-epoch
git-svn-id: http://svn.statmt.org/repository/mira@3845 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
parent
41d2d28408
commit
d04066ee0e
@ -101,7 +101,7 @@ namespace Mira {
|
||||
vector< float>& bleuScores,
|
||||
bool oracle,
|
||||
bool distinct,
|
||||
size_t rank)
|
||||
size_t rank)
|
||||
{
|
||||
StaticData &staticData = StaticData::InstanceNonConst();
|
||||
|
||||
@ -138,7 +138,7 @@ namespace Mira {
|
||||
|
||||
//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
|
||||
float scoreWithoutBleu = path.GetTotalScore() - bleuObjectiveWeight * bleuScore;
|
||||
cerr << "Total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
|
||||
cerr << "Rank " << rank << ", total score: " << path.GetTotalScore() << ", Score w/o bleu: " << scoreWithoutBleu << ", Bleu: " << bleuScore << endl;
|
||||
|
||||
Phrase bestPhrase = path.GetTargetPhrase();
|
||||
|
||||
|
1682
mira/Main.cpp
1682
mira/Main.cpp
File diff suppressed because it is too large
Load Diff
@ -7,15 +7,13 @@ using namespace std;
|
||||
namespace Mira {
|
||||
|
||||
int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
const vector< vector<ScoreComponentCollection> >& featureValues,
|
||||
const vector< vector<float> >& losses,
|
||||
const vector<std::vector<float> >& bleuScores,
|
||||
const vector< ScoreComponentCollection>& oracleFeatureValues,
|
||||
const vector< float> oracleBleuScores,
|
||||
const vector< size_t> sentenceIds,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank) {
|
||||
const vector<vector<ScoreComponentCollection> >& featureValues,
|
||||
const vector<vector<float> >& losses,
|
||||
const vector<std::vector<float> >& bleuScores, const vector<
|
||||
ScoreComponentCollection>& oracleFeatureValues,
|
||||
const vector<float> oracleBleuScores, const vector<size_t> sentenceIds,
|
||||
float learning_rate, float max_sentence_update, size_t rank,
|
||||
bool update_after_epoch) {
|
||||
|
||||
// add every oracle in batch to list of oracles (under certain conditions)
|
||||
for (size_t i = 0; i < oracleFeatureValues.size(); ++i) {
|
||||
@ -35,8 +33,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
m_bleu_of_oracles[sentenceId][j] = oracleBleuScores[j];
|
||||
updated = true;
|
||||
break;
|
||||
}
|
||||
else if (worstWeightedScore == 0 || currentWeightedScore > worstWeightedScore){
|
||||
} else if (worstWeightedScore == 0 || currentWeightedScore
|
||||
> worstWeightedScore) {
|
||||
worstWeightedScore = currentWeightedScore;
|
||||
indexOfWorst = j;
|
||||
}
|
||||
@ -47,8 +45,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
if (m_max_number_oracles > m_oracles[sentenceId].size()) {
|
||||
m_oracles[sentenceId].push_back(oracleFeatureValues[i]);
|
||||
m_bleu_of_oracles[sentenceId].push_back(oracleBleuScores[i]);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
m_oracles[sentenceId][indexOfWorst] = oracleFeatureValues[i];
|
||||
m_bleu_of_oracles[sentenceId][indexOfWorst] = oracleBleuScores[i];
|
||||
}
|
||||
@ -56,8 +53,8 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
}
|
||||
|
||||
size_t violatedConstraintsBefore = 0;
|
||||
vector< ScoreComponentCollection> featureValueDiffs;
|
||||
vector< float> lossMarginDistances;
|
||||
vector<ScoreComponentCollection> featureValueDiffs;
|
||||
vector<float> lossMarginDistances;
|
||||
|
||||
// find most violated constraint
|
||||
float maxViolationLossMarginDistance;
|
||||
@ -67,8 +64,9 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
for (size_t i = 0; i < featureValues.size(); ++i) {
|
||||
size_t sentenceId = sentenceIds[i];
|
||||
if (m_oracles[sentenceId].size() > 1)
|
||||
cerr << "Available oracles for source sentence " << sentenceId << ": " << m_oracles[sentenceId].size() << endl;
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
cerr << "Available oracles for source sentence " << sentenceId << ": "
|
||||
<< m_oracles[sentenceId].size() << endl;
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
// check if optimisation criterion is violated for one hypothesis and the oracle
|
||||
// h(e*) >= h(e_ij) + loss(e_ij)
|
||||
// h(e*) - h(e_ij) >= loss(e_ij)
|
||||
@ -84,15 +82,15 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
loss *= log10(bleuScores[i][j]);
|
||||
}
|
||||
|
||||
cerr << "Rank " << rank << ", loss: " << loss << ", model score diff: " << modelScoreDiff << endl;
|
||||
//cerr << "Rank " << rank << ", loss: " << loss << ", model score diff: "
|
||||
// << modelScoreDiff << endl;
|
||||
|
||||
bool addConstraint = true;
|
||||
if (modelScoreDiff < loss) {
|
||||
// constraint violated
|
||||
++violatedConstraintsBefore;
|
||||
oldDistanceFromOptimum += (loss - modelScoreDiff);
|
||||
}
|
||||
else if (m_onlyViolatedConstraints) {
|
||||
} else if (m_onlyViolatedConstraints) {
|
||||
// constraint not violated
|
||||
addConstraint = false;
|
||||
}
|
||||
@ -100,13 +98,13 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
if (addConstraint) {
|
||||
float lossMarginDistance = loss - modelScoreDiff;
|
||||
|
||||
if (m_accumulateMostViolatedConstraints && !m_pastAndCurrentConstraints) {
|
||||
if (m_accumulateMostViolatedConstraints
|
||||
&& !m_pastAndCurrentConstraints) {
|
||||
if (lossMarginDistance > maxViolationLossMarginDistance) {
|
||||
maxViolationLossMarginDistance = lossMarginDistance;
|
||||
maxViolationfeatureValueDiff = featureValueDiff;
|
||||
}
|
||||
}
|
||||
else if (m_pastAndCurrentConstraints) {
|
||||
} else if (m_pastAndCurrentConstraints) {
|
||||
if (lossMarginDistance > maxViolationLossMarginDistance) {
|
||||
maxViolationLossMarginDistance = lossMarginDistance;
|
||||
maxViolationfeatureValueDiff = featureValueDiff;
|
||||
@ -114,8 +112,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
|
||||
featureValueDiffs.push_back(featureValueDiff);
|
||||
lossMarginDistances.push_back(lossMarginDistance);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// Objective: 1/2 * ||w' - w||^2 + C * SUM_1_m[ max_1_n (l_ij - Delta_h_ij.w')]
|
||||
// To add a constraint for the optimiser for each sentence i and hypothesis j, we need:
|
||||
// 1. vector Delta_h_ij of the feature value differences (oracle - hypothesis)
|
||||
@ -135,22 +132,24 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Number of violated constraints before optimisation: " << violatedConstraintsBefore << endl;
|
||||
cerr << "Number of violated constraints before optimisation: "
|
||||
<< violatedConstraintsBefore << endl;
|
||||
if (featureValueDiffs.size() != 30) {
|
||||
cerr << "Number of constraints passed to optimiser: " << featureValueDiffs.size() << endl;
|
||||
cerr << "Number of constraints passed to optimiser: "
|
||||
<< featureValueDiffs.size() << endl;
|
||||
}
|
||||
|
||||
// run optimisation: compute alphas for all given constraints
|
||||
vector< float> alphas;
|
||||
vector<float> alphas;
|
||||
ScoreComponentCollection totalUpdate;
|
||||
if (m_accumulateMostViolatedConstraints && !m_pastAndCurrentConstraints) {
|
||||
m_featureValueDiffs.push_back(maxViolationfeatureValueDiff);
|
||||
m_lossMarginDistances.push_back(maxViolationLossMarginDistance);
|
||||
|
||||
if (m_slack != 0) {
|
||||
alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances, m_slack);
|
||||
}
|
||||
else {
|
||||
alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances,
|
||||
m_slack);
|
||||
} else {
|
||||
alphas = Hildreth::optimise(m_featureValueDiffs, m_lossMarginDistances);
|
||||
}
|
||||
|
||||
@ -164,8 +163,7 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
// accumulate update
|
||||
totalUpdate.PlusEquals(m_featureValueDiffs[k]);
|
||||
}
|
||||
}
|
||||
else if (violatedConstraintsBefore > 0) {
|
||||
} else if (violatedConstraintsBefore > 0) {
|
||||
if (m_pastAndCurrentConstraints) {
|
||||
// add all (most violated) past constraints to the list of current constraints
|
||||
for (size_t i = 0; i < m_featureValueDiffs.size(); ++i) {
|
||||
@ -179,9 +177,9 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
}
|
||||
|
||||
if (m_slack != 0) {
|
||||
alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances, m_slack);
|
||||
}
|
||||
else {
|
||||
alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances,
|
||||
m_slack);
|
||||
} else {
|
||||
alphas = Hildreth::optimise(featureValueDiffs, lossMarginDistances);
|
||||
}
|
||||
|
||||
@ -195,59 +193,70 @@ int MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
// accumulate update
|
||||
totalUpdate.PlusEquals(featureValueDiffs[k]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
cerr << "No constraint violated for this batch" << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// apply learning rate (fixed or flexible)
|
||||
if (learning_rate != 1) {
|
||||
cerr << "Rank " << rank << ", update before applying learning rate: " << totalUpdate << endl;
|
||||
cerr << "Rank " << rank << ", update before applying learning rate: "
|
||||
<< totalUpdate << endl;
|
||||
totalUpdate.MultiplyEquals(learning_rate);
|
||||
cerr << "Rank " << rank << ", update after applying learning rate: " << totalUpdate << endl;
|
||||
cerr << "Rank " << rank << ", update after applying learning rate: "
|
||||
<< totalUpdate << endl;
|
||||
}
|
||||
|
||||
// apply threshold scaling
|
||||
if (max_sentence_update != -1) {
|
||||
cerr << "Rank " << rank << ", update before scaling to max-sentence-update: " << totalUpdate << endl;
|
||||
cerr << "Rank " << rank
|
||||
<< ", update before scaling to max-sentence-update: " << totalUpdate
|
||||
<< endl;
|
||||
totalUpdate.ThresholdScaling(max_sentence_update);
|
||||
cerr << "Rank " << rank << ", update after scaling to max-sentence-update: " << totalUpdate << endl;
|
||||
cerr << "Rank " << rank
|
||||
<< ", update after scaling to max-sentence-update: " << totalUpdate
|
||||
<< endl;
|
||||
}
|
||||
|
||||
// apply update to weight vector
|
||||
cerr << "Rank " << rank << ", weights before update: " << currWeights << endl;
|
||||
currWeights.PlusEquals(totalUpdate);
|
||||
cerr << "Rank " << rank << ", weights after update: " << currWeights << endl;
|
||||
if (update_after_epoch) {
|
||||
m_accumulatedUpdates.PlusEquals(totalUpdate);
|
||||
cerr << "Rank " << rank << ", new accumulated updates:" << m_accumulatedUpdates << endl;
|
||||
} else {
|
||||
// apply update to weight vector
|
||||
cerr << "Rank " << rank << ", weights before update: " << currWeights
|
||||
<< endl;
|
||||
currWeights.PlusEquals(totalUpdate);
|
||||
cerr << "Rank " << rank << ", weights after update: " << currWeights
|
||||
<< endl;
|
||||
|
||||
// sanity check: how many constraints violated after optimisation?
|
||||
size_t violatedConstraintsAfter = 0;
|
||||
float newDistanceFromOptimum = 0;
|
||||
for (size_t i = 0; i < featureValues.size(); ++i) {
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
|
||||
featureValueDiff.MinusEquals(featureValues[i][j]);
|
||||
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
|
||||
float loss = losses[i][j] * m_marginScaleFactor;
|
||||
if (modelScoreDiff < loss) {
|
||||
++violatedConstraintsAfter;
|
||||
newDistanceFromOptimum += (loss - modelScoreDiff);
|
||||
// sanity check: how many constraints violated after optimisation?
|
||||
size_t violatedConstraintsAfter = 0;
|
||||
float newDistanceFromOptimum = 0;
|
||||
for (size_t i = 0; i < featureValues.size(); ++i) {
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
|
||||
featureValueDiff.MinusEquals(featureValues[i][j]);
|
||||
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
|
||||
float loss = losses[i][j] * m_marginScaleFactor;
|
||||
if (modelScoreDiff < loss) {
|
||||
++violatedConstraintsAfter;
|
||||
newDistanceFromOptimum += (loss - modelScoreDiff);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int constraintChange = violatedConstraintsBefore - violatedConstraintsAfter;
|
||||
cerr << "Rank " << rank << ", constraint change: " << constraintChange << " (before: " << violatedConstraintsBefore << ")" << endl;
|
||||
float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
|
||||
cerr << "Rank " << rank << ", distance change: " << distanceChange << endl;
|
||||
if (constraintChange < 0 && distanceChange < 0) {
|
||||
return -1;
|
||||
int constraintChange = violatedConstraintsBefore - violatedConstraintsAfter;
|
||||
cerr << "Rank " << rank << ", constraint change: " << constraintChange
|
||||
<< " (before: " << violatedConstraintsBefore << ")" << endl;
|
||||
float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
|
||||
cerr << "Rank " << rank << ", distance change: " << distanceChange << endl;
|
||||
if (constraintChange < 0 && distanceChange < 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,37 +30,21 @@ namespace Mira {
|
||||
public:
|
||||
Optimiser() {}
|
||||
virtual int updateWeights(Moses::ScoreComponentCollection& weights,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector<std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
|
||||
const std::vector< float> oracleBleuScores,
|
||||
const std::vector< size_t> dummy,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank) = 0;
|
||||
};
|
||||
|
||||
class DummyOptimiser : public Optimiser {
|
||||
public:
|
||||
virtual int updateWeights(Moses::ScoreComponentCollection& weights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector< std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
|
||||
const std::vector< float> oracleBleuScores,
|
||||
const std::vector< size_t> dummy,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank)
|
||||
{ return 0; }
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector< std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
|
||||
const std::vector< float> oracleBleuScores,
|
||||
const std::vector< size_t> sentenceId,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
bool update_after_epoch) = 0;
|
||||
};
|
||||
|
||||
class Perceptron : public Optimiser {
|
||||
public:
|
||||
|
||||
|
||||
virtual int updateWeights(Moses::ScoreComponentCollection& weights,
|
||||
virtual int updateWeights(Moses::ScoreComponentCollection& weights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector< std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
@ -69,7 +53,8 @@ namespace Mira {
|
||||
const std::vector< size_t> dummy,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank);
|
||||
size_t rank,
|
||||
bool update_after_epoch);
|
||||
};
|
||||
|
||||
class MiraOptimiser : public Optimiser {
|
||||
@ -93,7 +78,7 @@ namespace Mira {
|
||||
|
||||
~MiraOptimiser() {}
|
||||
|
||||
virtual int updateWeights(Moses::ScoreComponentCollection& weights,
|
||||
virtual int updateWeights(Moses::ScoreComponentCollection& weights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector< std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
@ -102,7 +87,8 @@ namespace Mira {
|
||||
const std::vector< size_t> sentenceId,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank);
|
||||
size_t rank,
|
||||
bool update_after_epoch);
|
||||
|
||||
void setOracleIndices(std::vector<size_t> oracleIndices) {
|
||||
m_oracleIndices= oracleIndices;
|
||||
@ -115,6 +101,14 @@ namespace Mira {
|
||||
void setMarginScaleFactor(float msf) {
|
||||
m_marginScaleFactor = msf;
|
||||
}
|
||||
|
||||
Moses::ScoreComponentCollection getAccumulatedUpdates() {
|
||||
return m_accumulatedUpdates;
|
||||
}
|
||||
|
||||
void resetAccumulatedUpdates() {
|
||||
m_accumulatedUpdates.ZeroAll();
|
||||
}
|
||||
|
||||
private:
|
||||
// number of hypotheses used for each nbest list (number of hope, fear, best model translations)
|
||||
@ -151,6 +145,8 @@ namespace Mira {
|
||||
bool m_accumulateMostViolatedConstraints;
|
||||
|
||||
bool m_pastAndCurrentConstraints;
|
||||
|
||||
Moses::ScoreComponentCollection m_accumulatedUpdates;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,8 @@ int Perceptron::updateWeights(ScoreComponentCollection& currWeights,
|
||||
const vector< size_t> dummy,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank)
|
||||
size_t rank,
|
||||
bool update_after_epoch)
|
||||
{
|
||||
for (size_t i = 0; i < featureValues.size(); ++i) {
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
|
Loading…
Reference in New Issue
Block a user