mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 07:42:21 +03:00
add parameter --delay-updates
This commit is contained in:
parent
1551f44879
commit
a050992abd
@ -121,6 +121,7 @@ int main(int argc, char** argv) {
|
||||
float max_length_dev_reference;
|
||||
float relax_BP;
|
||||
bool stabiliseLength;
|
||||
bool delayUpdates;
|
||||
po::options_description desc("Allowed options");
|
||||
desc.add_options()
|
||||
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
|
||||
@ -135,6 +136,7 @@ int main(int argc, char** argv) {
|
||||
("core-weights", po::value<string>(&coreWeightFile), "Weight file containing the core weights (already tuned, have to be non-zero)")
|
||||
("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
|
||||
("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
|
||||
("delay-updates", po::value<bool>(&delayUpdates)->default_value(false), "Delay all updates until the end of an epoch")
|
||||
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
|
||||
("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
|
||||
("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
|
||||
@ -413,6 +415,9 @@ int main(int argc, char** argv) {
|
||||
// when length ratio >= 1, set this to true
|
||||
bool fixLength = false;
|
||||
|
||||
// for accumulating delayed updates
|
||||
ScoreComponentCollection delayedWeightUpdates;
|
||||
|
||||
bool stop = false;
|
||||
// int sumStillViolatedConstraints;
|
||||
float *sendbuf, *recvbuf;
|
||||
@ -432,8 +437,10 @@ int main(int argc, char** argv) {
|
||||
size_t weightEpochDump = 0;
|
||||
|
||||
// sum lengths of dev hypothesis/references to calculate translation length ratio for this epoch
|
||||
size_t dev_hypothesis_length;
|
||||
size_t dev_reference_length;
|
||||
size_t dev_hypothesis_length = 0;
|
||||
size_t dev_reference_length = 0;
|
||||
|
||||
delayedWeightUpdates.ZeroAll();
|
||||
|
||||
size_t shardPosition = 0;
|
||||
vector<size_t>::const_iterator sid = shard.begin();
|
||||
@ -682,24 +689,28 @@ int main(int argc, char** argv) {
|
||||
// Run optimiser on batch:
|
||||
VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
|
||||
size_t update_status;
|
||||
ScoreComponentCollection weightUpdate;
|
||||
if (perceptron_update) {
|
||||
vector<vector<float> > dummy1;
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
|
||||
featureValuesHope, featureValuesFear, dummy1, dummy1, learning_rate, rank, epoch);
|
||||
}
|
||||
else if (hope_fear) {
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights, weightUpdate,
|
||||
featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, learning_rate, rank, epoch);
|
||||
}
|
||||
else {
|
||||
// model_hope_fear
|
||||
update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights,
|
||||
update_status = ((MiraOptimiser*) optimiser)->updateWeights(mosesWeights, weightUpdate,
|
||||
featureValues, losses, bleuScores, oracleFeatureValues, oracleBleuScores, learning_rate, rank, epoch);
|
||||
}
|
||||
|
||||
// sumStillViolatedConstraints += update_status;
|
||||
|
||||
if (update_status == 0) { // if weights were updated
|
||||
// apply weight update
|
||||
mosesWeights.PlusEquals(weightUpdate);
|
||||
|
||||
if (normaliseWeights) {
|
||||
mosesWeights.L1Normalise();
|
||||
}
|
||||
@ -718,8 +729,11 @@ int main(int argc, char** argv) {
|
||||
mosesWeights = averageWeights;
|
||||
}
|
||||
|
||||
// set new Moses weights
|
||||
decoder->setWeights(mosesWeights);
|
||||
if (delayUpdates)
|
||||
delayedWeightUpdates.PlusEquals(weightUpdate);
|
||||
else
|
||||
// set new Moses weights
|
||||
decoder->setWeights(mosesWeights);
|
||||
}
|
||||
|
||||
// update history (for approximate document Bleu)
|
||||
@ -830,8 +844,17 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
}
|
||||
}// end dumping
|
||||
|
||||
} // end of shard loop, end of this epoch
|
||||
|
||||
if (delayUpdates) {
|
||||
// apply all updates from this epoch to the weight vector
|
||||
ScoreComponentCollection mosesWeights = decoder->getWeights();
|
||||
mosesWeights.PlusEquals(delayedWeightUpdates);
|
||||
decoder->setWeights(mosesWeights);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", delayed update, new moses weights: " << mosesWeights << endl;
|
||||
}
|
||||
|
||||
if (stabiliseLength && !fixLength) {
|
||||
float lengthRatio = (float)(dev_hypothesis_length+1) / dev_reference_length;
|
||||
if (lengthRatio >= 1) {
|
||||
|
@ -7,7 +7,9 @@ using namespace std;
|
||||
|
||||
namespace Mira {
|
||||
|
||||
size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
size_t MiraOptimiser::updateWeights(
|
||||
ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection& weightUpdate,
|
||||
const vector<vector<ScoreComponentCollection> >& featureValues,
|
||||
const vector<vector<float> >& losses,
|
||||
const vector<vector<float> >& bleuScores,
|
||||
@ -142,9 +144,7 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
}
|
||||
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
|
||||
|
||||
// apply update to weight vector
|
||||
currWeights.PlusEquals(summedUpdate);
|
||||
weightUpdate.PlusEquals(summedUpdate);
|
||||
|
||||
// Sanity check: are there still violated constraints after optimisation?
|
||||
/* int violatedConstraintsAfter = 0;
|
||||
@ -164,7 +164,9 @@ size_t MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
size_t MiraOptimiser::updateWeightsHopeFear(
|
||||
Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
@ -299,9 +301,7 @@ size_t MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& cur
|
||||
}
|
||||
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
|
||||
|
||||
// apply update to weight vector
|
||||
currWeights.PlusEquals(summedUpdate);
|
||||
weightUpdate.PlusEquals(summedUpdate);
|
||||
|
||||
// Sanity check: are there still violated constraints after optimisation?
|
||||
/* int violatedConstraintsAfter = 0;
|
||||
|
@ -30,7 +30,9 @@ namespace Mira {
|
||||
public:
|
||||
Optimiser() {}
|
||||
|
||||
virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
virtual size_t updateWeightsHopeFear(
|
||||
Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
@ -42,7 +44,9 @@ namespace Mira {
|
||||
|
||||
class Perceptron : public Optimiser {
|
||||
public:
|
||||
virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
virtual size_t updateWeightsHopeFear(
|
||||
Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
@ -66,6 +70,7 @@ namespace Mira {
|
||||
m_margin_slack(margin_slack) { }
|
||||
|
||||
size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector<std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
@ -75,6 +80,7 @@ namespace Mira {
|
||||
size_t rank,
|
||||
size_t epoch);
|
||||
virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& weightUpdate,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
|
@ -24,7 +24,9 @@ using namespace std;
|
||||
|
||||
namespace Mira {
|
||||
|
||||
size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
|
||||
size_t Perceptron::updateWeightsHopeFear(
|
||||
ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection& weightUpdate,
|
||||
const vector< vector<ScoreComponentCollection> >& featureValuesHope,
|
||||
const vector< vector<ScoreComponentCollection> >& featureValuesFear,
|
||||
const vector< vector<float> >& dummy1,
|
||||
@ -39,7 +41,7 @@ size_t Perceptron::updateWeightsHopeFear(ScoreComponentCollection& currWeights,
|
||||
featureValueDiff.MinusEquals(featureValuesFear[0][0]);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
|
||||
featureValueDiff.MultiplyEquals(perceptron_learning_rate);
|
||||
currWeights.PlusEquals(featureValueDiff);
|
||||
weightUpdate.PlusEquals(featureValueDiff);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl;
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user