mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-13 10:40:52 +03:00
For hope-fear option, add only constraints between hope and fear translations, not between hope translations. Make hope-fear sizes flexible with --hope-n and --fear-n
git-svn-id: http://svn.statmt.org/repository/mira@3897 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
parent
a177b58d18
commit
020c71216b
277
mira/Main.cpp
277
mira/Main.cpp
@ -144,7 +144,6 @@ int main(int argc, char** argv) {
|
||||
float min_sentence_update;
|
||||
size_t weightedLossFunction;
|
||||
size_t n;
|
||||
size_t nbest_first;
|
||||
size_t batchSize;
|
||||
bool distinctNbest;
|
||||
bool onlyViolatedConstraints;
|
||||
@ -190,7 +189,8 @@ int main(int argc, char** argv) {
|
||||
bool analytical_update;
|
||||
bool perceptron_update;
|
||||
bool hope_fear;
|
||||
size_t constraints;
|
||||
int hope_n;
|
||||
int fear_n;
|
||||
po::options_description desc("Allowed options");
|
||||
desc.add_options()
|
||||
("accumulate-most-violated-constraints", po::value<bool>(&accumulateMostViolatedConstraints)->default_value(false),"Accumulate most violated constraint per example")
|
||||
@ -206,7 +206,6 @@ int main(int argc, char** argv) {
|
||||
("burn-in-input-file", po::value<string>(&burnInInputFile), "Input file for burn-in phase of BLEU history")
|
||||
("burn-in-reference-files", po::value<vector<string> >(&burnInReferenceFiles), "Reference file for burn-in phase of BLEU history")
|
||||
("config,f", po::value<string>(&mosesConfigFile), "Moses ini file")
|
||||
("constraints", po::value<size_t>(&constraints)->default_value(1), "Number of constraints used for analytical update")
|
||||
("control-updates", po::value<bool>(&controlUpdates)->default_value(true), "Ignore updates that increase number of violated constraints AND increase the error")
|
||||
("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
|
||||
("decr-learning-rate", po::value<float>(&decrease_learning_rate)->default_value(0),"Decrease learning rate by the given value after every epoch")
|
||||
@ -215,11 +214,13 @@ int main(int argc, char** argv) {
|
||||
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use nbest list with distinct translations in inference step")
|
||||
("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights, when using mpi")
|
||||
("epochs,e", po::value<size_t>(&epochs)->default_value(5), "Number of epochs")
|
||||
("fear-n", po::value<int>(&fear_n)->default_value(-1), "Number of fear translations used")
|
||||
("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
|
||||
("hildreth", po::value<bool>(&hildreth)->default_value(true), "Use Hildreth's optimisation algorithm")
|
||||
("history-of-1best", po::value<bool>(&historyOf1best)->default_value(0), "Use the 1best translation to update the history")
|
||||
("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
|
||||
("hope-fear", po::value<bool>(&hope_fear)->default_value(true), "Use only hope and fear translations (not model)")
|
||||
("hope-n", po::value<int>(&hope_n)->default_value(-1), "Number of hope translations used")
|
||||
("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
|
||||
("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
|
||||
("learning-rate", po::value<float>(&learning_rate)->default_value(1), "Learning rate (fixed or flexible)")
|
||||
@ -236,8 +237,7 @@ int main(int argc, char** argv) {
|
||||
("msf-step", po::value<float>(&marginScaleFactorStep)->default_value(0), "Decrease margin scale factor iteratively by the value provided")
|
||||
("multiplyA", po::value<bool>(&multiplyA)->default_value(true), "Multiply A with outcome before passing to Hildreth")
|
||||
("nbest,n", po::value<size_t>(&n)->default_value(10), "Number of translations in nbest list")
|
||||
("nbest-first", po::value<size_t>(&nbest_first)->default_value(0), "Number of translations in nbest list in the first epoch")
|
||||
("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
|
||||
("normalise", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
|
||||
("only-violated-constraints", po::value<bool>(&onlyViolatedConstraints)->default_value(false), "Add only violated constraints to the optimisation problem")
|
||||
("past-and-current-constraints", po::value<bool>(&pastAndCurrentConstraints)->default_value(false), "Accumulate most violated constraint per example and use them along all current constraints")
|
||||
("perceptron-update", po::value<bool>(&perceptron_update)->default_value(false), "Do a simple perceptron style update")
|
||||
@ -295,8 +295,9 @@ int main(int argc, char** argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (nbest_first == 0) {
|
||||
nbest_first = n;
|
||||
if (hope_n == -1 && fear_n == -1) {
|
||||
hope_n = n;
|
||||
fear_n = n;
|
||||
}
|
||||
|
||||
// load input and references
|
||||
@ -486,7 +487,6 @@ int main(int argc, char** argv) {
|
||||
cerr << "msf-min: " << marginScaleFactorMin << endl;
|
||||
cerr << "weighted-loss-function: " << weightedLossFunction << endl;
|
||||
cerr << "nbest: " << n << endl;
|
||||
cerr << "nbest-first: " << nbest_first << endl;
|
||||
cerr << "batch-size: " << batchSize << endl;
|
||||
cerr << "distinct-nbest: " << distinctNbest << endl;
|
||||
cerr << "only-violated-constraints: " << onlyViolatedConstraints << endl;
|
||||
@ -523,6 +523,8 @@ int main(int argc, char** argv) {
|
||||
cerr << "perceptron-update: " << perceptron_update << endl;
|
||||
cerr << "analytical-update: " << analytical_update << endl;
|
||||
cerr << "hope-fear: " << hope_fear << endl;
|
||||
cerr << "hope-n: " << hope_n << endl;
|
||||
cerr << "fear-n: " << fear_n << endl;
|
||||
|
||||
if (learner == "mira") {
|
||||
cerr << "Optimising using Mira" << endl;
|
||||
@ -608,6 +610,12 @@ int main(int argc, char** argv) {
|
||||
vector<vector<float> > bleuScores;
|
||||
vector<vector<float> > dummyBleuScores;
|
||||
|
||||
// variables for hope-fear setting
|
||||
vector<vector<ScoreComponentCollection> > featureValuesHope;
|
||||
vector<vector<ScoreComponentCollection> > featureValuesFear;
|
||||
vector<vector<float> > bleuScoresHope;
|
||||
vector<vector<float> > bleuScoresFear;
|
||||
|
||||
// get moses weights
|
||||
ScoreComponentCollection mosesWeights = decoder->getWeights();
|
||||
cerr << "\nRank " << rank << ", next batch" << endl;
|
||||
@ -632,77 +640,22 @@ int main(int argc, char** argv) {
|
||||
|
||||
vector<ScoreComponentCollection> newFeatureValues;
|
||||
vector<float> newBleuScores;
|
||||
featureValues.push_back(newFeatureValues);
|
||||
dummyFeatureValues.push_back(newFeatureValues);
|
||||
bleuScores.push_back(newBleuScores);
|
||||
dummyBleuScores.push_back(newBleuScores);
|
||||
|
||||
size_t pass_n = (epoch == 0)? nbest_first : n;
|
||||
|
||||
if (perceptron_update || analytical_update) {
|
||||
if (constraints == 1) {
|
||||
if (historyOf1best) {
|
||||
// MODEL (for updating the history)
|
||||
cerr << "Rank " << rank << ", run decoder to get " << 1 << "best wrt model score" << endl;
|
||||
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
|
||||
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
oneBests.push_back(bestModel);
|
||||
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl;
|
||||
}
|
||||
|
||||
// HOPE
|
||||
cerr << "Rank " << rank << ", run decoder to get 1best hope translations" << endl;
|
||||
size_t oraclePos = dummyFeatureValues[batchPosition].size();
|
||||
vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight,
|
||||
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
// needed for history
|
||||
inputLengths.push_back(decoder->getCurrentInputLength());
|
||||
ref_ids.push_back(*sid);
|
||||
decoder->cleanup();
|
||||
oracles.push_back(oracle);
|
||||
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << dummyBleuScores[batchPosition][oraclePos] << endl;
|
||||
|
||||
oracleFeatureValues.push_back(dummyFeatureValues[batchPosition][oraclePos]);
|
||||
oracleBleuScores.push_back(dummyBleuScores[batchPosition][oraclePos]);
|
||||
// clear dummies
|
||||
dummyFeatureValues[batchPosition].clear();
|
||||
dummyBleuScores[batchPosition].clear();
|
||||
|
||||
// FEAR
|
||||
cerr << "Rank " << rank << ", run decoder to get 1best fear translations" << endl;
|
||||
size_t fearPos = featureValues[batchPosition].size();
|
||||
vector<const Word*> fear = decoder->getNBest(input, *sid, 1, -1.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScores[batchPosition][fearPos] << endl;
|
||||
for (size_t i = 0; i < fear.size(); ++i) {
|
||||
delete fear[i];
|
||||
}
|
||||
}
|
||||
else {
|
||||
// TODO:
|
||||
}
|
||||
if (hope_fear) {
|
||||
featureValuesHope.push_back(newFeatureValues);
|
||||
featureValuesFear.push_back(newFeatureValues);
|
||||
bleuScoresHope.push_back(newBleuScores);
|
||||
bleuScoresFear.push_back(newBleuScores);
|
||||
}
|
||||
else {
|
||||
if (!hope_fear) {
|
||||
// MODEL
|
||||
cerr << "Rank " << rank << ", run decoder to get " << pass_n << "best wrt model score" << endl;
|
||||
vector<const Word*> bestModel = decoder->getNBest(input, *sid, pass_n, 0.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
oneBests.push_back(bestModel);
|
||||
// needed for calculating bleu of dev (1best translations) // todo:
|
||||
all_ref_ids.push_back(*sid);
|
||||
allBestModelScore.push_back(bestModel);
|
||||
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;
|
||||
}
|
||||
else if (historyOf1best) {
|
||||
// MODEL (for updating the history only, using dummy vectors)
|
||||
featureValues.push_back(newFeatureValues);
|
||||
dummyFeatureValues.push_back(newFeatureValues);
|
||||
bleuScores.push_back(newBleuScores);
|
||||
dummyBleuScores.push_back(newBleuScores);
|
||||
}
|
||||
|
||||
if (perceptron_update || analytical_update) {
|
||||
if (historyOf1best) {
|
||||
// MODEL (for updating the history)
|
||||
cerr << "Rank " << rank << ", run decoder to get " << 1 << "best wrt model score" << endl;
|
||||
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
|
||||
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
|
||||
@ -713,33 +666,114 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
// HOPE
|
||||
cerr << "Rank " << rank << ", run decoder to get " << pass_n << "best hope translations" << endl;
|
||||
size_t oraclePos = featureValues[batchPosition].size();
|
||||
vector<const Word*> oracle = decoder->getNBest(input, *sid, pass_n, 1.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
cerr << "Rank " << rank << ", run decoder to get 1best hope translations" << endl;
|
||||
size_t oraclePos = dummyFeatureValues[batchPosition].size();
|
||||
vector<const Word*> oracle = decoder->getNBest(input, *sid, 1, 1.0, bleuScoreWeight,
|
||||
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
// needed for history
|
||||
inputLengths.push_back(decoder->getCurrentInputLength());
|
||||
ref_ids.push_back(*sid);
|
||||
decoder->cleanup();
|
||||
oracles.push_back(oracle);
|
||||
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl;
|
||||
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << dummyBleuScores[batchPosition][oraclePos] << endl;
|
||||
|
||||
oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
|
||||
oracleBleuScores.push_back(bleuScores[batchPosition][oraclePos]);
|
||||
oracleFeatureValues.push_back(dummyFeatureValues[batchPosition][oraclePos]);
|
||||
oracleBleuScores.push_back(dummyBleuScores[batchPosition][oraclePos]);
|
||||
// clear dummies
|
||||
dummyFeatureValues[batchPosition].clear();
|
||||
dummyBleuScores[batchPosition].clear();
|
||||
|
||||
// FEAR
|
||||
cerr << "Rank " << rank << ", run decoder to get " << pass_n << "best fear translations" << endl;
|
||||
cerr << "Rank " << rank << ", run decoder to get 1best fear translations" << endl;
|
||||
size_t fearPos = featureValues[batchPosition].size();
|
||||
vector<const Word*> fear = decoder->getNBest(input, *sid, pass_n, -1.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
vector<const Word*> fear = decoder->getNBest(input, *sid, 1, -1.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScores[batchPosition][fearPos] << endl;
|
||||
for (size_t i = 0; i < fear.size(); ++i) {
|
||||
delete fear[i];
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (hope_fear) {
|
||||
if (historyOf1best) {
|
||||
// MODEL (for updating the history only, using dummy vectors)
|
||||
cerr << "Rank " << rank << ", run decoder to get " << 1 << "best wrt model score" << endl;
|
||||
vector<const Word*> bestModel = decoder->getNBest(input, *sid, 1, 0.0, bleuScoreWeight,
|
||||
dummyFeatureValues[batchPosition], dummyBleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
oneBests.push_back(bestModel);
|
||||
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << dummyBleuScores[batchPosition][0] << endl;
|
||||
}
|
||||
|
||||
// HOPE
|
||||
cerr << "Rank " << rank << ", run decoder to get " << hope_n << "best hope translations" << endl;
|
||||
vector<const Word*> oracle = decoder->getNBest(input, *sid, hope_n, 1.0, bleuScoreWeight,
|
||||
featureValuesHope[batchPosition], bleuScoresHope[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
// needed for history
|
||||
inputLengths.push_back(decoder->getCurrentInputLength());
|
||||
ref_ids.push_back(*sid);
|
||||
decoder->cleanup();
|
||||
oracles.push_back(oracle);
|
||||
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScoresHope[batchPosition][0] << endl;
|
||||
|
||||
// FEAR
|
||||
cerr << "Rank " << rank << ", run decoder to get " << fear_n << "best fear translations" << endl;
|
||||
vector<const Word*> fear = decoder->getNBest(input, *sid, fear_n, -1.0, bleuScoreWeight,
|
||||
featureValuesFear[batchPosition], bleuScoresFear[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScoresFear[batchPosition][0] << endl;
|
||||
for (size_t i = 0; i < fear.size(); ++i) {
|
||||
delete fear[i];
|
||||
}
|
||||
}
|
||||
else {
|
||||
// MODEL
|
||||
cerr << "Rank " << rank << ", run decoder to get " << n << "best wrt model score" << endl;
|
||||
vector<const Word*> bestModel = decoder->getNBest(input, *sid, n, 0.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
oneBests.push_back(bestModel);
|
||||
// needed for calculating bleu of dev (1best translations) // todo:
|
||||
all_ref_ids.push_back(*sid);
|
||||
allBestModelScore.push_back(bestModel);
|
||||
cerr << "Rank " << rank << ", model length: " << bestModel.size() << " Bleu: " << bleuScores[batchPosition][0] << endl;
|
||||
|
||||
// HOPE
|
||||
cerr << "Rank " << rank << ", run decoder to get " << n << "best hope translations" << endl;
|
||||
size_t oraclePos = featureValues[batchPosition].size();
|
||||
vector<const Word*> oracle = decoder->getNBest(input, *sid, n, 1.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
// needed for history
|
||||
inputLengths.push_back(decoder->getCurrentInputLength());
|
||||
ref_ids.push_back(*sid);
|
||||
decoder->cleanup();
|
||||
oracles.push_back(oracle);
|
||||
cerr << "Rank " << rank << ", oracle length: " << oracle.size() << " Bleu: " << bleuScores[batchPosition][oraclePos] << endl;
|
||||
|
||||
oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
|
||||
oracleBleuScores.push_back(bleuScores[batchPosition][oraclePos]);
|
||||
|
||||
// FEAR
|
||||
cerr << "Rank " << rank << ", run decoder to get " << n << "best fear translations" << endl;
|
||||
size_t fearPos = featureValues[batchPosition].size();
|
||||
vector<const Word*> fear = decoder->getNBest(input, *sid, n, -1.0, bleuScoreWeight,
|
||||
featureValues[batchPosition], bleuScores[batchPosition], true,
|
||||
distinctNbest, rank);
|
||||
decoder->cleanup();
|
||||
cerr << "Rank " << rank << ", fear length: " << fear.size() << " Bleu: " << bleuScores[batchPosition][fearPos] << endl;
|
||||
for (size_t i = 0; i < fear.size(); ++i) {
|
||||
delete fear[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cerr << "Rank " << rank << ", sentence " << *sid << ", best model Bleu (approximate sentence bleu): " << bleuScores[batchPosition][0] << endl;
|
||||
// summedApproxBleu += bleuScores[batchPosition][0];
|
||||
@ -750,12 +784,13 @@ int main(int argc, char** argv) {
|
||||
++shardPosition;
|
||||
} // end of batch loop
|
||||
|
||||
// Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis)
|
||||
vector<vector<float> > losses(actualBatchSize);
|
||||
for (size_t batchPosition = 0; batchPosition < actualBatchSize; ++batchPosition) {
|
||||
for (size_t j = 0; j < bleuScores[batchPosition].size(); ++j) {
|
||||
losses[batchPosition].push_back(oracleBleuScores[batchPosition]
|
||||
- bleuScores[batchPosition][j]);
|
||||
if (!hope_fear) {
|
||||
// Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis)
|
||||
for (size_t batchPosition = 0; batchPosition < actualBatchSize; ++batchPosition) {
|
||||
for (size_t j = 0; j < bleuScores[batchPosition].size(); ++j) {
|
||||
losses[batchPosition].push_back(oracleBleuScores[batchPosition] - bleuScores[batchPosition][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -766,11 +801,21 @@ int main(int argc, char** argv) {
|
||||
|
||||
if (logFeatureValues) {
|
||||
for (size_t i = 0; i < featureValues.size(); ++i) {
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
featureValues[i][j].ApplyLog(baseOfLog);
|
||||
if (hope_fear) {
|
||||
for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
|
||||
featureValuesHope[i][j].ApplyLog(baseOfLog);
|
||||
}
|
||||
for (size_t j = 0; j < featureValuesFear[i].size(); ++j) {
|
||||
featureValuesFear[i][j].ApplyLog(baseOfLog);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
featureValues[i][j].ApplyLog(baseOfLog);
|
||||
}
|
||||
|
||||
oracleFeatureValues[i].ApplyLog(baseOfLog);
|
||||
oracleFeatureValues[i].ApplyLog(baseOfLog);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -786,12 +831,29 @@ int main(int argc, char** argv) {
|
||||
// optionally print out the feature values
|
||||
if (print_feature_values) {
|
||||
cerr << "\nRank " << rank << ", epoch " << epoch << ", feature values: " << endl;
|
||||
for (size_t i = 0; i < featureValues.size(); ++i) {
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
cerr << featureValues[i][j] << endl;
|
||||
if (hope_fear) {
|
||||
cerr << "hope: " << endl;
|
||||
for (size_t i = 0; i < featureValuesHope.size(); ++i) {
|
||||
for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
|
||||
cerr << featureValuesHope[i][j] << endl;
|
||||
}
|
||||
}
|
||||
cerr << "fear: " << endl;
|
||||
for (size_t i = 0; i < featureValuesFear.size(); ++i) {
|
||||
for (size_t j = 0; j < featureValuesFear[i].size(); ++j) {
|
||||
cerr << featureValuesFear[i][j] << endl;
|
||||
}
|
||||
}
|
||||
cerr << endl;
|
||||
}
|
||||
else {
|
||||
for (size_t i = 0; i < featureValues.size(); ++i) {
|
||||
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
||||
cerr << featureValues[i][j] << endl;
|
||||
}
|
||||
}
|
||||
cerr << endl;
|
||||
}
|
||||
cerr << endl;
|
||||
}
|
||||
|
||||
// Run optimiser on batch:
|
||||
@ -818,9 +880,16 @@ int main(int argc, char** argv) {
|
||||
learning_rate, max_sentence_update, rank, epoch, controlUpdates);
|
||||
}
|
||||
else {
|
||||
update_status = optimiser->updateWeights(mosesWeights, featureValues,
|
||||
losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids,
|
||||
learning_rate, max_sentence_update, rank, epoch, updates_per_epoch, controlUpdates);
|
||||
if (hope_fear) {
|
||||
update_status = optimiser->updateWeightsHopeFear(mosesWeights,
|
||||
featureValuesHope, featureValuesFear, bleuScoresHope, bleuScoresFear, ref_ids,
|
||||
learning_rate, max_sentence_update, rank, epoch, updates_per_epoch, controlUpdates);
|
||||
}
|
||||
else {
|
||||
update_status = optimiser->updateWeights(mosesWeights, featureValues,
|
||||
losses, bleuScores, oracleFeatureValues, oracleBleuScores, ref_ids,
|
||||
learning_rate, max_sentence_update, rank, epoch, updates_per_epoch, controlUpdates);
|
||||
}
|
||||
}
|
||||
|
||||
if (update_status[0] == 1) {
|
||||
|
@ -11,8 +11,12 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
const vector<vector<float> >& losses,
|
||||
const vector<vector<float> >& bleuScores,
|
||||
const vector<ScoreComponentCollection>& oracleFeatureValues,
|
||||
const vector<float> oracleBleuScores, const vector<size_t> sentenceIds,
|
||||
float learning_rate, float max_sentence_update, size_t rank, size_t epoch,
|
||||
const vector<float> oracleBleuScores,
|
||||
const vector<size_t> sentenceIds,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
int updates_per_epoch,
|
||||
bool controlUpdates) {
|
||||
|
||||
@ -79,6 +83,7 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
featureValueDiff.MinusEquals(featureValues[i][j]);
|
||||
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
|
||||
if (modelScoreDiff == 0) {
|
||||
cerr << "equal feature values, constraint skipped.." << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -312,6 +317,280 @@ vector<int> MiraOptimiser::updateWeights(ScoreComponentCollection& currWeights,
|
||||
return statusPlus;
|
||||
}
|
||||
|
||||
vector<int> MiraOptimiser::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
const std::vector<std::vector<float> >& bleuScoresFear,
|
||||
const std::vector< size_t> sentenceIds,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
int updates_per_epoch,
|
||||
bool controlUpdates) {
|
||||
|
||||
// vector of feature values differences for all created constraints
|
||||
vector<ScoreComponentCollection> featureValueDiffs;
|
||||
vector<float> lossMinusModelScoreDiffs;
|
||||
vector<float> all_losses;
|
||||
|
||||
// most violated constraint in batch
|
||||
ScoreComponentCollection max_batch_featureValueDiff;
|
||||
float max_batch_loss = -1;
|
||||
float max_batch_lossMinusModelScoreDiff = -1;
|
||||
|
||||
// Make constraints for new hypothesis translations
|
||||
float epsilon = 0.0001;
|
||||
int violatedConstraintsBefore = 0;
|
||||
float oldDistanceFromOptimum = 0;
|
||||
|
||||
// iterate over input sentences (1 (online) or more (batch))
|
||||
for (size_t i = 0; i < featureValuesHope.size(); ++i) {
|
||||
size_t sentenceId = sentenceIds[i];
|
||||
|
||||
// Pair all hope translations with all fear translations for one input sentence
|
||||
for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
|
||||
for (size_t k = 0; k < featureValuesFear[i].size(); ++k) {
|
||||
ScoreComponentCollection featureValueDiff = featureValuesHope[i][j];
|
||||
featureValueDiff.MinusEquals(featureValuesFear[i][k]);
|
||||
cerr << "feature value diff: " << featureValueDiff << endl;
|
||||
float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
|
||||
if (modelScoreDiff == 0) {
|
||||
cerr << "equal feature values, constraint skipped.." << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
float loss = bleuScoresHope[i][j] - bleuScoresFear[i][k];
|
||||
loss *= m_marginScaleFactor;
|
||||
if (m_weightedLossFunction == 1) {
|
||||
loss *= bleuScoresHope[i][j];
|
||||
}
|
||||
else if (m_weightedLossFunction == 2) {
|
||||
loss *= log2(bleuScoresHope[i][j]);
|
||||
}
|
||||
else if (m_weightedLossFunction == 10) {
|
||||
loss *= log10(bleuScoresHope[i][j]);
|
||||
}
|
||||
|
||||
// check if constraint is violated
|
||||
bool violated = false;
|
||||
bool addConstraint = true;
|
||||
float diff = loss - modelScoreDiff;
|
||||
cerr << "constraint: " << modelScoreDiff << " >= " << loss << endl;
|
||||
if (diff > (epsilon + m_precision)) {
|
||||
violated = true;
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << diff << " (loss: " << loss << ")" << endl;
|
||||
}
|
||||
else if (m_onlyViolatedConstraints) {
|
||||
addConstraint = false;
|
||||
}
|
||||
|
||||
float lossMinusModelScoreDiff = loss - modelScoreDiff;
|
||||
if (violated) {
|
||||
if (m_accumulateMostViolatedConstraints || m_pastAndCurrentConstraints) {
|
||||
// find the most violated constraint per batch
|
||||
if (lossMinusModelScoreDiff > max_batch_lossMinusModelScoreDiff) {
|
||||
max_batch_lossMinusModelScoreDiff = lossMinusModelScoreDiff;
|
||||
max_batch_featureValueDiff = featureValueDiff;
|
||||
max_batch_loss = loss;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (addConstraint && !m_accumulateMostViolatedConstraints) {
|
||||
featureValueDiffs.push_back(featureValueDiff);
|
||||
lossMinusModelScoreDiffs.push_back(lossMinusModelScoreDiff);
|
||||
all_losses.push_back(loss);
|
||||
|
||||
if (violated) {
|
||||
++violatedConstraintsBefore;
|
||||
oldDistanceFromOptimum += diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (m_pastAndCurrentConstraints || m_accumulateMostViolatedConstraints) {
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", number of current constraints: " << featureValueDiffs.size() << endl;
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", number of current violated constraints: " << violatedConstraintsBefore << endl;
|
||||
}
|
||||
|
||||
if (m_max_number_oracles == 1) {
|
||||
for (size_t k = 0; k < sentenceIds.size(); ++k) {
|
||||
size_t sentenceId = sentenceIds[k];
|
||||
m_oracles[sentenceId].clear();
|
||||
}
|
||||
}
|
||||
|
||||
size_t pastViolatedConstraints = 0;
|
||||
// Add constraints from past iterations (BEFORE updating that list)
|
||||
if (m_pastAndCurrentConstraints || m_accumulateMostViolatedConstraints) {
|
||||
// add all past (most violated) constraints to the list of current constraints, computed with current weights!
|
||||
for (size_t i = 0; i < m_featureValueDiffs.size(); ++i) {
|
||||
float modelScoreDiff = m_featureValueDiffs[i].InnerProduct(currWeights);
|
||||
|
||||
// check if constraint is violated
|
||||
bool violated = false;
|
||||
bool addConstraint = true;
|
||||
float diff = m_losses[i] - modelScoreDiff;
|
||||
if (diff > (epsilon + m_precision)) {
|
||||
violated = true;
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", past violation: " << diff << " (loss: " << m_losses[i] << ")" << endl;
|
||||
}
|
||||
else if (m_onlyViolatedConstraints) {
|
||||
addConstraint = false;
|
||||
}
|
||||
|
||||
if (addConstraint) {
|
||||
featureValueDiffs.push_back(m_featureValueDiffs[i]);
|
||||
lossMinusModelScoreDiffs.push_back(m_losses[i] - modelScoreDiff);
|
||||
all_losses.push_back(m_losses[i]);
|
||||
// cerr << "old constraint: " << modelScoreDiff << " >= " << m_losses[i] << endl;
|
||||
|
||||
if (violated) {
|
||||
++violatedConstraintsBefore;
|
||||
++pastViolatedConstraints;
|
||||
oldDistanceFromOptimum += diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (m_pastAndCurrentConstraints || m_accumulateMostViolatedConstraints) {
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", number of past constraints: " << m_featureValueDiffs.size() << endl;
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", number of past violated constraints: " << pastViolatedConstraints << endl;
|
||||
}
|
||||
|
||||
// Add new most violated constraint to the list of current constraints
|
||||
if (m_accumulateMostViolatedConstraints) {
|
||||
if (max_batch_loss != -1) {
|
||||
float modelScoreDiff = max_batch_featureValueDiff.InnerProduct(currWeights);
|
||||
float diff = max_batch_loss - modelScoreDiff;
|
||||
++violatedConstraintsBefore;
|
||||
oldDistanceFromOptimum += diff;
|
||||
|
||||
featureValueDiffs.push_back(max_batch_featureValueDiff);
|
||||
lossMinusModelScoreDiffs.push_back(max_batch_loss - modelScoreDiff);
|
||||
all_losses.push_back(max_batch_loss);
|
||||
// cerr << "new constraint: " << modelScoreDiff << " !>= " << max_batch_loss << endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Update the list of accumulated most violated constraints
|
||||
if (max_batch_loss != -1) {
|
||||
bool updated = false;
|
||||
for (size_t i = 0; i < m_featureValueDiffs.size(); ++i) {
|
||||
float oldScore = m_featureValueDiffs[i].InnerProduct(currWeights);
|
||||
float newScore = max_batch_featureValueDiff.InnerProduct(currWeights);
|
||||
if (abs(oldScore-newScore) < epsilon) {
|
||||
m_losses[i] = max_batch_loss;
|
||||
updated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!updated) {
|
||||
m_featureValueDiffs.push_back(max_batch_featureValueDiff);
|
||||
m_losses.push_back(max_batch_loss);
|
||||
}
|
||||
}
|
||||
|
||||
// run optimisation: compute alphas for all given constraints
|
||||
vector<float> alphas;
|
||||
ScoreComponentCollection summedUpdate;
|
||||
if (violatedConstraintsBefore > 0) {
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << featureValueDiffs.size() << endl;
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", number of violated constraints passed to optimizer: " << violatedConstraintsBefore << endl;
|
||||
if (m_slack != 0) {
|
||||
alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack);
|
||||
} else {
|
||||
alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs);
|
||||
}
|
||||
|
||||
// Update the weight vector according to the alphas and the feature value differences
|
||||
// * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis))
|
||||
for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
|
||||
float alpha = alphas[k];
|
||||
cerr << "alpha: " << alpha << endl;
|
||||
ScoreComponentCollection update(featureValueDiffs[k]);
|
||||
update.MultiplyEquals(alpha);
|
||||
|
||||
// sum up update
|
||||
summedUpdate.PlusEquals(update);
|
||||
}
|
||||
}
|
||||
else {
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", check, no constraint violated for this batch" << endl;
|
||||
vector<int> status(3);
|
||||
status[0] = 1;
|
||||
status[1] = 0;
|
||||
status[2] = 0;
|
||||
return status;
|
||||
}
|
||||
|
||||
ScoreComponentCollection newWeights(currWeights);
|
||||
newWeights.PlusEquals(summedUpdate);
|
||||
|
||||
// Sanity check: are there still violated constraints after optimisation?
|
||||
int violatedConstraintsAfter = 0;
|
||||
float newDistanceFromOptimum = 0;
|
||||
for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
|
||||
float modelScoreDiff = featureValueDiffs[i].InnerProduct(newWeights);
|
||||
float loss = all_losses[i];
|
||||
float diff = loss - modelScoreDiff;
|
||||
if (diff > (epsilon + m_precision)) {
|
||||
++violatedConstraintsAfter;
|
||||
newDistanceFromOptimum += diff;
|
||||
}
|
||||
}
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", check, violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl;
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl;
|
||||
|
||||
if (controlUpdates && violatedConstraintsAfter > 0) {
|
||||
float distanceChange = oldDistanceFromOptimum - newDistanceFromOptimum;
|
||||
if ((violatedConstraintsBefore - violatedConstraintsAfter) <= 0 && distanceChange < 0) {
|
||||
vector<int> statusPlus(3);
|
||||
statusPlus[0] = -1;
|
||||
statusPlus[1] = -1;
|
||||
statusPlus[2] = -1;
|
||||
return statusPlus;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply learning rate (fixed or flexible)
|
||||
if (learning_rate != 1) {
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update before applying learning rate: " << summedUpdate << endl;
|
||||
summedUpdate.MultiplyEquals(learning_rate);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update after applying learning rate: " << summedUpdate << endl;
|
||||
}
|
||||
|
||||
// Apply threshold scaling
|
||||
if (max_sentence_update != -1) {
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update before scaling to max-sentence-update: " << summedUpdate << endl;
|
||||
summedUpdate.ThresholdScaling(max_sentence_update);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", update after scaling to max-sentence-update: " << summedUpdate << endl;
|
||||
}
|
||||
|
||||
// Apply update to weight vector or store it for later
|
||||
if (updates_per_epoch > 0) {
|
||||
m_accumulatedUpdates.PlusEquals(summedUpdate);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", new accumulated updates:" << m_accumulatedUpdates << endl;
|
||||
} else {
|
||||
// apply update to weight vector
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", weights before update: " << currWeights << endl;
|
||||
currWeights.PlusEquals(summedUpdate);
|
||||
cerr << "Rank " << rank << ", epoch " << epoch << ", weights after update: " << currWeights << endl;
|
||||
}
|
||||
|
||||
vector<int> statusPlus(3);
|
||||
statusPlus[0] = 0;
|
||||
statusPlus[1] = violatedConstraintsBefore;
|
||||
statusPlus[2] = violatedConstraintsAfter;
|
||||
return statusPlus;
|
||||
}
|
||||
|
||||
vector<int> MiraOptimiser::updateWeightsAnalytically(ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection& featureValues,
|
||||
float loss,
|
||||
|
@ -29,7 +29,7 @@ namespace Mira {
|
||||
class Optimiser {
|
||||
public:
|
||||
Optimiser() {}
|
||||
virtual std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& weights,
|
||||
virtual std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& featureValues,
|
||||
float loss,
|
||||
Moses::ScoreComponentCollection& oracleFeatureValues,
|
||||
@ -40,24 +40,36 @@ namespace Mira {
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
bool controlUpdates) = 0;
|
||||
virtual std::vector<int> updateWeights(Moses::ScoreComponentCollection& weights,
|
||||
virtual std::vector<int> updateWeights(Moses::ScoreComponentCollection& currWeights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector< std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
|
||||
const std::vector< float> oracleBleuScores,
|
||||
const std::vector< size_t> sentenceId,
|
||||
const std::vector< size_t> sentenceIds,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
int updates_per_epoch,
|
||||
bool controlUpdates) = 0;
|
||||
virtual std::vector<int> updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
const std::vector<std::vector<float> >& bleuScoresFear,
|
||||
const std::vector< size_t> sentenceIds,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
int updates_per_epoch,
|
||||
bool controlUpdates) = 0;
|
||||
};
|
||||
|
||||
class Perceptron : public Optimiser {
|
||||
public:
|
||||
virtual std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& weights,
|
||||
virtual std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& featureValues,
|
||||
float loss,
|
||||
Moses::ScoreComponentCollection& oracleFeatureValues,
|
||||
@ -68,19 +80,31 @@ namespace Mira {
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
bool controlUpdates);
|
||||
virtual std::vector<int> updateWeights(Moses::ScoreComponentCollection& weights,
|
||||
virtual std::vector<int> updateWeights(Moses::ScoreComponentCollection& currWeights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector< std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
const std::vector<Moses::ScoreComponentCollection>& oracleFeatureValues,
|
||||
const std::vector< float> oracleBleuScores,
|
||||
const std::vector< size_t> dummy,
|
||||
const std::vector< size_t> sentenceIds,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
int updates_per_epoch,
|
||||
bool controlUpdates);
|
||||
virtual std::vector<int> updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
const std::vector<std::vector<float> >& bleuScoresFear,
|
||||
const std::vector< size_t> sentenceIds,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
int updates_per_epoch,
|
||||
bool controlUpdates);
|
||||
};
|
||||
|
||||
class MiraOptimiser : public Optimiser {
|
||||
@ -105,7 +129,7 @@ namespace Mira {
|
||||
|
||||
~MiraOptimiser() {}
|
||||
|
||||
virtual std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& weights,
|
||||
virtual std::vector<int> updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights,
|
||||
Moses::ScoreComponentCollection& featureValues,
|
||||
float loss,
|
||||
Moses::ScoreComponentCollection& oracleFeatureValues,
|
||||
@ -116,13 +140,25 @@ namespace Mira {
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
bool controlUpdates);
|
||||
virtual std::vector<int> updateWeights(Moses::ScoreComponentCollection& weights,
|
||||
virtual std::vector<int> updateWeights(Moses::ScoreComponentCollection& currWeights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
|
||||
const std::vector< std::vector<float> >& losses,
|
||||
const std::vector<std::vector<float> >& bleuScores,
|
||||
const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
|
||||
const std::vector< float> oracleBleuScores,
|
||||
const std::vector< size_t> sentenceId,
|
||||
const std::vector< size_t> sentenceIds,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
int updates_per_epoch,
|
||||
bool controlUpdates);
|
||||
virtual std::vector<int> updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
const std::vector<std::vector<float> >& bleuScoresFear,
|
||||
const std::vector< size_t> sentenceIds,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
|
@ -24,21 +24,40 @@ using namespace std;
|
||||
|
||||
namespace Mira {
|
||||
|
||||
vector<int> Perceptron::updateWeightsAnalytically(ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection& featureValues,
|
||||
float loss,
|
||||
ScoreComponentCollection& oracleFeatureValues,
|
||||
float oracleBleuScore,
|
||||
size_t sentenceId,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
bool controlUpdates) {
|
||||
vector<int> status(1);
|
||||
status[0] = 0;
|
||||
return status;
|
||||
}
|
||||
vector<int> Perceptron::updateWeightsAnalytically(ScoreComponentCollection& currWeights,
|
||||
ScoreComponentCollection& featureValues,
|
||||
float loss,
|
||||
ScoreComponentCollection& oracleFeatureValues,
|
||||
float oracleBleuScore,
|
||||
size_t sentenceId,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
bool controlUpdates) {
|
||||
|
||||
vector<int> status(1);
|
||||
status[0] = 0;
|
||||
return status;
|
||||
}
|
||||
|
||||
vector<int> Perceptron::updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
|
||||
const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
|
||||
const std::vector<std::vector<float> >& bleuScoresHope,
|
||||
const std::vector<std::vector<float> >& bleuScoresFear,
|
||||
const std::vector< size_t> sentenceId,
|
||||
float learning_rate,
|
||||
float max_sentence_update,
|
||||
size_t rank,
|
||||
size_t epoch,
|
||||
int updates_per_epoch,
|
||||
bool controlUpdates) {
|
||||
|
||||
vector<int> status(1);
|
||||
status[0] = 0;
|
||||
return status;
|
||||
}
|
||||
|
||||
vector<int> Perceptron::updateWeights(ScoreComponentCollection& currWeights,
|
||||
const vector< vector<ScoreComponentCollection> >& featureValues,
|
||||
|
Loading…
Reference in New Issue
Block a user